* [PULL 15/75] Revert "hw/acpi/ghes: Make ghes_record_cper_errors() static"
[not found] <cover.1759691708.git.mst@redhat.com>
@ 2025-10-05 19:16 ` Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 16/75] acpi/ghes: Cleanup the code which gets ghes ged state Michael S. Tsirkin
` (10 subsequent siblings)
11 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2025-10-05 19:16 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Maydell, Mauro Carvalho Chehab, Jonathan Cameron,
Philippe Mathieu-Daudé, Igor Mammedov, Ani Sinha,
Dongjiu Geng, qemu-arm
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
The ghes_record_cper_errors() function was introduced to be used
by other types of errors, as part of the error injection
patch series. That's why it is not static.
Make it non-static again to allow its usage outside ghes.c
This reverts commit 611f3bdb20f7828b0813aa90d47d1275ef18329b.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-ID: <14f2a888cfbf922d5f2bf94d7612114f25107d59.1758610789.git.mchehab+huawei@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
include/hw/acpi/ghes.h | 2 ++
hw/acpi/ghes.c | 6 ++++--
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index 578a582203..39619a2457 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -75,6 +75,8 @@ void acpi_build_hest(GArray *table_data, GArray *hardware_errors,
void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
GArray *hardware_errors);
int acpi_ghes_memory_errors(uint16_t source_id, uint64_t error_physical_addr);
+void ghes_record_cper_errors(const void *cper, size_t len,
+ uint16_t source_id, Error **errp);
/**
* acpi_ghes_present: Report whether ACPI GHES table is present
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index b85bb48195..b709c177cd 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -390,8 +390,8 @@ static void get_hw_error_offsets(uint64_t ghes_addr,
*read_ack_register_addr = ghes_addr + sizeof(uint64_t);
}
-static void ghes_record_cper_errors(const void *cper, size_t len,
- uint16_t source_id, Error **errp)
+void ghes_record_cper_errors(const void *cper, size_t len,
+ uint16_t source_id, Error **errp)
{
uint64_t cper_addr = 0, read_ack_register_addr = 0, read_ack_register;
AcpiGedState *acpi_ged_state;
@@ -440,6 +440,8 @@ static void ghes_record_cper_errors(const void *cper, size_t len,
/* Write the generic error data entry into guest memory */
cpu_physical_memory_write(cper_addr, cper, len);
+
+ return;
}
int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address)
--
MST
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PULL 16/75] acpi/ghes: Cleanup the code which gets ghes ged state
[not found] <cover.1759691708.git.mst@redhat.com>
2025-10-05 19:16 ` [PULL 15/75] Revert "hw/acpi/ghes: Make ghes_record_cper_errors() static" Michael S. Tsirkin
@ 2025-10-05 19:16 ` Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 17/75] acpi/ghes: prepare to change the way HEST offsets are calculated Michael S. Tsirkin
` (9 subsequent siblings)
11 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2025-10-05 19:16 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Maydell, Mauro Carvalho Chehab, Jonathan Cameron,
Igor Mammedov, Ani Sinha, Dongjiu Geng, Paolo Bonzini, qemu-arm,
kvm
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Move the check logic into a common function and simplify the
code which checks if GHES is enabled and was properly setup.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-ID: <2bbb1d3eb88b0a668114adef2f1c2a94deebba0e.1758610789.git.mchehab+huawei@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
include/hw/acpi/ghes.h | 14 +++++++-------
hw/acpi/ghes-stub.c | 7 ++++---
hw/acpi/ghes.c | 38 +++++++++++---------------------------
target/arm/kvm.c | 7 +++++--
4 files changed, 27 insertions(+), 39 deletions(-)
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index 39619a2457..f96ac3e85c 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -66,7 +66,6 @@ enum {
typedef struct AcpiGhesState {
uint64_t hw_error_le;
- bool present; /* True if GHES is present at all on this board */
} AcpiGhesState;
void acpi_build_hest(GArray *table_data, GArray *hardware_errors,
@@ -74,15 +73,16 @@ void acpi_build_hest(GArray *table_data, GArray *hardware_errors,
const char *oem_id, const char *oem_table_id);
void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
GArray *hardware_errors);
-int acpi_ghes_memory_errors(uint16_t source_id, uint64_t error_physical_addr);
-void ghes_record_cper_errors(const void *cper, size_t len,
+int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
+ uint64_t error_physical_addr);
+void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
uint16_t source_id, Error **errp);
/**
- * acpi_ghes_present: Report whether ACPI GHES table is present
+ * acpi_ghes_get_state: Get a pointer for ACPI ghes state
*
- * Returns: true if the system has an ACPI GHES table and it is
- * safe to call acpi_ghes_memory_errors() to record a memory error.
+ * Returns: a pointer to ghes state if the system has an ACPI GHES table,
+ * NULL, otherwise.
*/
-bool acpi_ghes_present(void);
+AcpiGhesState *acpi_ghes_get_state(void);
#endif
diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
index 7cec1812da..40f660c246 100644
--- a/hw/acpi/ghes-stub.c
+++ b/hw/acpi/ghes-stub.c
@@ -11,12 +11,13 @@
#include "qemu/osdep.h"
#include "hw/acpi/ghes.h"
-int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address)
+int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
+ uint64_t physical_address)
{
return -1;
}
-bool acpi_ghes_present(void)
+AcpiGhesState *acpi_ghes_get_state(void)
{
- return false;
+ return NULL;
}
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index b709c177cd..84b891fd3d 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -360,18 +360,12 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
/* Create a read-write fw_cfg file for Address */
fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL,
NULL, &(ags->hw_error_le), sizeof(ags->hw_error_le), false);
-
- ags->present = true;
}
static void get_hw_error_offsets(uint64_t ghes_addr,
uint64_t *cper_addr,
uint64_t *read_ack_register_addr)
{
- if (!ghes_addr) {
- return;
- }
-
/*
* non-HEST version supports only one source, so no need to change
* the start offset based on the source ID. Also, we can't validate
@@ -390,35 +384,20 @@ static void get_hw_error_offsets(uint64_t ghes_addr,
*read_ack_register_addr = ghes_addr + sizeof(uint64_t);
}
-void ghes_record_cper_errors(const void *cper, size_t len,
+void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
uint16_t source_id, Error **errp)
{
uint64_t cper_addr = 0, read_ack_register_addr = 0, read_ack_register;
- AcpiGedState *acpi_ged_state;
- AcpiGhesState *ags;
if (len > ACPI_GHES_MAX_RAW_DATA_LENGTH) {
error_setg(errp, "GHES CPER record is too big: %zd", len);
return;
}
- acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
- NULL));
- if (!acpi_ged_state) {
- error_setg(errp, "Can't find ACPI_GED object");
- return;
- }
- ags = &acpi_ged_state->ghes_state;
-
assert(ACPI_GHES_ERROR_SOURCE_COUNT == 1);
get_hw_error_offsets(le64_to_cpu(ags->hw_error_le),
&cper_addr, &read_ack_register_addr);
- if (!cper_addr) {
- error_setg(errp, "can not find Generic Error Status Block");
- return;
- }
-
cpu_physical_memory_read(read_ack_register_addr,
&read_ack_register, sizeof(read_ack_register));
@@ -444,7 +423,8 @@ void ghes_record_cper_errors(const void *cper, size_t len,
return;
}
-int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address)
+int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
+ uint64_t physical_address)
{
/* Memory Error Section Type */
const uint8_t guid[] =
@@ -470,7 +450,7 @@ int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address)
acpi_ghes_build_append_mem_cper(block, physical_address);
/* Report the error */
- ghes_record_cper_errors(block->data, block->len, source_id, &errp);
+ ghes_record_cper_errors(ags, block->data, block->len, source_id, &errp);
g_array_free(block, true);
@@ -482,7 +462,7 @@ int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address)
return 0;
}
-bool acpi_ghes_present(void)
+AcpiGhesState *acpi_ghes_get_state(void)
{
AcpiGedState *acpi_ged_state;
AcpiGhesState *ags;
@@ -491,8 +471,12 @@ bool acpi_ghes_present(void)
NULL));
if (!acpi_ged_state) {
- return false;
+ return NULL;
}
ags = &acpi_ged_state->ghes_state;
- return ags->present;
+
+ if (!ags->hw_error_le) {
+ return NULL;
+ }
+ return ags;
}
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index b8a1c071f5..891d4fcde9 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -2433,10 +2433,12 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
{
ram_addr_t ram_addr;
hwaddr paddr;
+ AcpiGhesState *ags;
assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
- if (acpi_ghes_present() && addr) {
+ ags = acpi_ghes_get_state();
+ if (ags && addr) {
ram_addr = qemu_ram_addr_from_host(addr);
if (ram_addr != RAM_ADDR_INVALID &&
kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
@@ -2454,7 +2456,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
*/
if (code == BUS_MCEERR_AR) {
kvm_cpu_synchronize_state(c);
- if (!acpi_ghes_memory_errors(ACPI_HEST_SRC_ID_SEA, paddr)) {
+ if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SEA,
+ paddr)) {
kvm_inject_arm_sea(c);
} else {
error_report("failed to record the error");
--
MST
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PULL 17/75] acpi/ghes: prepare to change the way HEST offsets are calculated
[not found] <cover.1759691708.git.mst@redhat.com>
2025-10-05 19:16 ` [PULL 15/75] Revert "hw/acpi/ghes: Make ghes_record_cper_errors() static" Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 16/75] acpi/ghes: Cleanup the code which gets ghes ged state Michael S. Tsirkin
@ 2025-10-05 19:16 ` Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 18/75] acpi/ghes: add a firmware file with HEST address Michael S. Tsirkin
` (8 subsequent siblings)
11 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2025-10-05 19:16 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Maydell, Mauro Carvalho Chehab, Jonathan Cameron,
Igor Mammedov, Dongjiu Geng, Ani Sinha, Shannon Zhao, qemu-arm
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Add a new ags flag to change the way HEST offsets are calculated.
Currently, offsets needed to store ACPI HEST offsets and read ack
are calculated based on a previous knowledge from the logic
which creates the HEST table.
Such logic is not generic, not allowing to easily add more HEST
entries nor replicates what OSPM does.
As the next patches will be adding a more generic logic, add a
new use_hest_addr, set to false, in preparation for such changes.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-ID: <f5de17bf04b27828e1a439ad396b4f7982eaf156.1758610789.git.mchehab+huawei@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
include/hw/acpi/ghes.h | 12 +++++++++++-
hw/acpi/ghes.c | 39 ++++++++++++++++++++++++---------------
hw/arm/virt-acpi-build.c | 13 ++++++++++---
3 files changed, 45 insertions(+), 19 deletions(-)
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index f96ac3e85c..411f592662 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -64,11 +64,21 @@ enum {
ACPI_GHES_ERROR_SOURCE_COUNT
};
+/*
+ * AcpiGhesState stores GPA values that will be used to fill HEST entries.
+ *
+ * When use_hest_addr is false, the GPA of the etc/hardware_errors firmware
+ * is stored at hw_error_le. This is the default on QEMU 9.x.
+ *
+ * An GPA value equal to zero means that GHES is not present.
+ */
typedef struct AcpiGhesState {
uint64_t hw_error_le;
+ bool use_hest_addr; /* Currently, always false */
} AcpiGhesState;
-void acpi_build_hest(GArray *table_data, GArray *hardware_errors,
+void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
+ GArray *hardware_errors,
BIOSLinker *linker,
const char *oem_id, const char *oem_table_id);
void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index 84b891fd3d..9243b5ad4a 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -206,7 +206,8 @@ ghes_gen_err_data_uncorrectable_recoverable(GArray *block,
* Initialize "etc/hardware_errors" and "etc/hardware_errors_addr" fw_cfg blobs.
* See docs/specs/acpi_hest_ghes.rst for blobs format.
*/
-static void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker)
+static void build_ghes_error_table(AcpiGhesState *ags, GArray *hardware_errors,
+ BIOSLinker *linker)
{
int i, error_status_block_offset;
@@ -251,13 +252,15 @@ static void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker)
i * ACPI_GHES_MAX_RAW_DATA_LENGTH);
}
- /*
- * tell firmware to write hardware_errors GPA into
- * hardware_errors_addr fw_cfg, once the former has been initialized.
- */
- bios_linker_loader_write_pointer(linker, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, 0,
- sizeof(uint64_t),
- ACPI_HW_ERROR_FW_CFG_FILE, 0);
+ if (!ags->use_hest_addr) {
+ /*
+ * Tell firmware to write hardware_errors GPA into
+ * hardware_errors_addr fw_cfg, once the former has been initialized.
+ */
+ bios_linker_loader_write_pointer(linker, ACPI_HW_ERROR_ADDR_FW_CFG_FILE,
+ 0, sizeof(uint64_t),
+ ACPI_HW_ERROR_FW_CFG_FILE, 0);
+ }
}
/* Build Generic Hardware Error Source version 2 (GHESv2) */
@@ -331,14 +334,15 @@ static void build_ghes_v2(GArray *table_data,
}
/* Build Hardware Error Source Table */
-void acpi_build_hest(GArray *table_data, GArray *hardware_errors,
+void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
+ GArray *hardware_errors,
BIOSLinker *linker,
const char *oem_id, const char *oem_table_id)
{
AcpiTable table = { .sig = "HEST", .rev = 1,
.oem_id = oem_id, .oem_table_id = oem_table_id };
- build_ghes_error_table(hardware_errors, linker);
+ build_ghes_error_table(ags, hardware_errors, linker);
acpi_table_begin(&table, table_data);
@@ -357,9 +361,11 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
fw_cfg_add_file(s, ACPI_HW_ERROR_FW_CFG_FILE, hardware_error->data,
hardware_error->len);
- /* Create a read-write fw_cfg file for Address */
- fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL,
- NULL, &(ags->hw_error_le), sizeof(ags->hw_error_le), false);
+ if (!ags->use_hest_addr) {
+ /* Create a read-write fw_cfg file for Address */
+ fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL,
+ NULL, &(ags->hw_error_le), sizeof(ags->hw_error_le), false);
+ }
}
static void get_hw_error_offsets(uint64_t ghes_addr,
@@ -395,8 +401,11 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
}
assert(ACPI_GHES_ERROR_SOURCE_COUNT == 1);
- get_hw_error_offsets(le64_to_cpu(ags->hw_error_le),
- &cper_addr, &read_ack_register_addr);
+
+ if (!ags->use_hest_addr) {
+ get_hw_error_offsets(le64_to_cpu(ags->hw_error_le),
+ &cper_addr, &read_ack_register_addr);
+ }
cpu_physical_memory_read(read_ack_register_addr,
&read_ack_register, sizeof(read_ack_register));
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 96830f7c4e..bbe83fab9a 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -1181,9 +1181,16 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
build_dbg2(tables_blob, tables->linker, vms);
if (vms->ras) {
- acpi_add_table(table_offsets, tables_blob);
- acpi_build_hest(tables_blob, tables->hardware_errors, tables->linker,
- vms->oem_id, vms->oem_table_id);
+ AcpiGedState *acpi_ged_state;
+ AcpiGhesState *ags;
+
+ acpi_ged_state = ACPI_GED(vms->acpi_dev);
+ ags = &acpi_ged_state->ghes_state;
+ if (ags) {
+ acpi_add_table(table_offsets, tables_blob);
+ acpi_build_hest(ags, tables_blob, tables->hardware_errors,
+ tables->linker, vms->oem_id, vms->oem_table_id);
+ }
}
if (ms->numa_state->num_nodes > 0) {
--
MST
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PULL 18/75] acpi/ghes: add a firmware file with HEST address
[not found] <cover.1759691708.git.mst@redhat.com>
` (2 preceding siblings ...)
2025-10-05 19:16 ` [PULL 17/75] acpi/ghes: prepare to change the way HEST offsets are calculated Michael S. Tsirkin
@ 2025-10-05 19:16 ` Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 19/75] acpi/ghes: Use HEST table offsets when preparing GHES records Michael S. Tsirkin
` (7 subsequent siblings)
11 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2025-10-05 19:16 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Maydell, Mauro Carvalho Chehab, Jonathan Cameron,
Igor Mammedov, Dongjiu Geng, Ani Sinha, qemu-arm
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Store HEST table address at GPA, placing its the start of the table at
hest_addr_le variable.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-ID: <c29aa5e6ab9b2d93dd5328481630c3b03da86261.1758610789.git.mchehab+huawei@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
include/hw/acpi/ghes.h | 6 +++++-
hw/acpi/ghes.c | 22 ++++++++++++++++++++--
2 files changed, 25 insertions(+), 3 deletions(-)
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index 411f592662..ea9baab764 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -70,9 +70,13 @@ enum {
* When use_hest_addr is false, the GPA of the etc/hardware_errors firmware
* is stored at hw_error_le. This is the default on QEMU 9.x.
*
- * An GPA value equal to zero means that GHES is not present.
+ * When use_hest_addr is true, the GPA of the HEST table is stored at
+ * hest_addr_le. This is the default for QEMU 10.x and above.
+ *
+ * Whe both GPA values are equal to zero means that GHES is not present.
*/
typedef struct AcpiGhesState {
+ uint64_t hest_addr_le;
uint64_t hw_error_le;
bool use_hest_addr; /* Currently, always false */
} AcpiGhesState;
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index 9243b5ad4a..cbc96c1909 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -30,6 +30,7 @@
#define ACPI_HW_ERROR_FW_CFG_FILE "etc/hardware_errors"
#define ACPI_HW_ERROR_ADDR_FW_CFG_FILE "etc/hardware_errors_addr"
+#define ACPI_HEST_ADDR_FW_CFG_FILE "etc/acpi_table_hest_addr"
/* The max size in bytes for one error block */
#define ACPI_GHES_MAX_RAW_DATA_LENGTH (1 * KiB)
@@ -341,6 +342,9 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
{
AcpiTable table = { .sig = "HEST", .rev = 1,
.oem_id = oem_id, .oem_table_id = oem_table_id };
+ uint32_t hest_offset;
+
+ hest_offset = table_data->len;
build_ghes_error_table(ags, hardware_errors, linker);
@@ -352,6 +356,17 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
ACPI_GHES_NOTIFY_SEA, ACPI_HEST_SRC_ID_SEA);
acpi_table_end(linker, &table);
+
+ if (ags->use_hest_addr) {
+ /*
+ * Tell firmware to write into GPA the address of HEST via fw_cfg,
+ * once initialized.
+ */
+ bios_linker_loader_write_pointer(linker,
+ ACPI_HEST_ADDR_FW_CFG_FILE, 0,
+ sizeof(uint64_t),
+ ACPI_BUILD_TABLE_FILE, hest_offset);
+ }
}
void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
@@ -361,7 +376,10 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
fw_cfg_add_file(s, ACPI_HW_ERROR_FW_CFG_FILE, hardware_error->data,
hardware_error->len);
- if (!ags->use_hest_addr) {
+ if (ags->use_hest_addr) {
+ fw_cfg_add_file_callback(s, ACPI_HEST_ADDR_FW_CFG_FILE, NULL, NULL,
+ NULL, &(ags->hest_addr_le), sizeof(ags->hest_addr_le), false);
+ } else {
/* Create a read-write fw_cfg file for Address */
fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL,
NULL, &(ags->hw_error_le), sizeof(ags->hw_error_le), false);
@@ -484,7 +502,7 @@ AcpiGhesState *acpi_ghes_get_state(void)
}
ags = &acpi_ged_state->ghes_state;
- if (!ags->hw_error_le) {
+ if (!ags->hw_error_le && !ags->hest_addr_le) {
return NULL;
}
return ags;
--
MST
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PULL 19/75] acpi/ghes: Use HEST table offsets when preparing GHES records
[not found] <cover.1759691708.git.mst@redhat.com>
` (3 preceding siblings ...)
2025-10-05 19:16 ` [PULL 18/75] acpi/ghes: add a firmware file with HEST address Michael S. Tsirkin
@ 2025-10-05 19:16 ` Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 20/75] acpi/ghes: don't hard-code the number of sources for HEST table Michael S. Tsirkin
` (6 subsequent siblings)
11 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2025-10-05 19:16 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Maydell, Mauro Carvalho Chehab, Jonathan Cameron,
Igor Mammedov, Dongjiu Geng, Ani Sinha, qemu-arm
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
There are two pointers that are needed during error injection:
1. The start address of the CPER block to be stored;
2. The address of the read ack.
It is preferable to calculate them from the HEST table. This allows
checking the source ID, the size of the table and the type of the
HEST error block structures.
Yet, keep the old code, as this is needed for migration purposes
from older QEMU versions.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-ID: <d4344e8dbe66372e1e093d968eda2e8b0527ba48.1758610789.git.mchehab+huawei@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
include/hw/acpi/ghes.h | 2 +-
hw/acpi/ghes.c | 100 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 101 insertions(+), 1 deletion(-)
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index ea9baab764..5265102ba5 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -78,7 +78,7 @@ enum {
typedef struct AcpiGhesState {
uint64_t hest_addr_le;
uint64_t hw_error_le;
- bool use_hest_addr; /* Currently, always false */
+ bool use_hest_addr; /* True if HEST address is present */
} AcpiGhesState;
void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index cbc96c1909..668ca72587 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -41,6 +41,12 @@
/* Address offset in Generic Address Structure(GAS) */
#define GAS_ADDR_OFFSET 4
+/*
+ * ACPI spec 1.0b
+ * 5.2.3 System Description Table Header
+ */
+#define ACPI_DESC_HEADER_OFFSET 36
+
/*
* The total size of Generic Error Data Entry
* ACPI 6.1/6.2: 18.3.2.7.1 Generic Error Data,
@@ -61,6 +67,30 @@
*/
#define ACPI_GHES_GESB_SIZE 20
+/*
+ * See the memory layout map at docs/specs/acpi_hest_ghes.rst.
+ */
+
+/*
+ * ACPI 6.1: 18.3.2.8 Generic Hardware Error Source version 2
+ * Table 18-344 Generic Hardware Error Source version 2 (GHESv2) Structure
+ */
+#define HEST_GHES_V2_ENTRY_SIZE 92
+
+/*
+ * ACPI 6.1: 18.3.2.8 Generic Hardware Error Source version 2
+ * Table 18-344 Generic Hardware Error Source version 2 (GHESv2) Structure
+ * Read Ack Register
+ */
+#define GHES_READ_ACK_ADDR_OFF 64
+
+/*
+ * ACPI 6.1: 18.3.2.7: Generic Hardware Error Source
+ * Table 18-341 Generic Hardware Error Source Structure
+ * Error Status Address
+ */
+#define GHES_ERR_STATUS_ADDR_OFF 20
+
/*
* Values for error_severity field
*/
@@ -408,6 +438,73 @@ static void get_hw_error_offsets(uint64_t ghes_addr,
*read_ack_register_addr = ghes_addr + sizeof(uint64_t);
}
+static void get_ghes_source_offsets(uint16_t source_id,
+ uint64_t hest_addr,
+ uint64_t *cper_addr,
+ uint64_t *read_ack_start_addr,
+ Error **errp)
+{
+ uint64_t hest_err_block_addr, hest_read_ack_addr;
+ uint64_t err_source_entry, error_block_addr;
+ uint32_t num_sources, i;
+
+ hest_addr += ACPI_DESC_HEADER_OFFSET;
+
+ cpu_physical_memory_read(hest_addr, &num_sources,
+ sizeof(num_sources));
+ num_sources = le32_to_cpu(num_sources);
+
+ err_source_entry = hest_addr + sizeof(num_sources);
+
+ /*
+ * Currently, HEST Error source navigates only for GHESv2 tables
+ */
+ for (i = 0; i < num_sources; i++) {
+ uint64_t addr = err_source_entry;
+ uint16_t type, src_id;
+
+ cpu_physical_memory_read(addr, &type, sizeof(type));
+ type = le16_to_cpu(type);
+
+ /* For now, we only know the size of GHESv2 table */
+ if (type != ACPI_GHES_SOURCE_GENERIC_ERROR_V2) {
+ error_setg(errp, "HEST: type %d not supported.", type);
+ return;
+ }
+
+ /* Compare CPER source ID at the GHESv2 structure */
+ addr += sizeof(type);
+ cpu_physical_memory_read(addr, &src_id, sizeof(src_id));
+ if (le16_to_cpu(src_id) == source_id) {
+ break;
+ }
+
+ err_source_entry += HEST_GHES_V2_ENTRY_SIZE;
+ }
+ if (i == num_sources) {
+ error_setg(errp, "HEST: Source %d not found.", source_id);
+ return;
+ }
+
+ /* Navigate through table address pointers */
+ hest_err_block_addr = err_source_entry + GHES_ERR_STATUS_ADDR_OFF +
+ GAS_ADDR_OFFSET;
+
+ cpu_physical_memory_read(hest_err_block_addr, &error_block_addr,
+ sizeof(error_block_addr));
+ error_block_addr = le64_to_cpu(error_block_addr);
+
+ cpu_physical_memory_read(error_block_addr, cper_addr,
+ sizeof(*cper_addr));
+ *cper_addr = le64_to_cpu(*cper_addr);
+
+ hest_read_ack_addr = err_source_entry + GHES_READ_ACK_ADDR_OFF +
+ GAS_ADDR_OFFSET;
+ cpu_physical_memory_read(hest_read_ack_addr, read_ack_start_addr,
+ sizeof(*read_ack_start_addr));
+ *read_ack_start_addr = le64_to_cpu(*read_ack_start_addr);
+}
+
void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
uint16_t source_id, Error **errp)
{
@@ -423,6 +520,9 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
if (!ags->use_hest_addr) {
get_hw_error_offsets(le64_to_cpu(ags->hw_error_le),
&cper_addr, &read_ack_register_addr);
+ } else {
+ get_ghes_source_offsets(source_id, le64_to_cpu(ags->hest_addr_le),
+ &cper_addr, &read_ack_register_addr, errp);
}
cpu_physical_memory_read(read_ack_register_addr,
--
MST
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PULL 20/75] acpi/ghes: don't hard-code the number of sources for HEST table
[not found] <cover.1759691708.git.mst@redhat.com>
` (4 preceding siblings ...)
2025-10-05 19:16 ` [PULL 19/75] acpi/ghes: Use HEST table offsets when preparing GHES records Michael S. Tsirkin
@ 2025-10-05 19:16 ` Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 21/75] acpi/ghes: add a notifier to notify when error data is ready Michael S. Tsirkin
` (5 subsequent siblings)
11 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2025-10-05 19:16 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Maydell, Mauro Carvalho Chehab, Jonathan Cameron,
Igor Mammedov, Dongjiu Geng, Ani Sinha, Shannon Zhao,
Paolo Bonzini, qemu-arm, kvm
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
The current code is actually dependent on having just one error
structure with a single source, as any change there would cause
migration issues.
As the number of sources should be arch-dependent, as it will depend on
what kind of notifications will exist, and how many errors can be
reported at the same time, change the logic to be more flexible,
allowing the number of sources to be defined when building the
HEST table by the caller.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-ID: <1698680848c11d6f26368426f1657e14faaf55c4.1758610789.git.mchehab+huawei@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
include/hw/acpi/ghes.h | 17 ++++++++++++-----
hw/acpi/ghes.c | 39 +++++++++++++++++++++------------------
hw/arm/virt-acpi-build.c | 8 +++++++-
target/arm/kvm.c | 2 +-
4 files changed, 41 insertions(+), 25 deletions(-)
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index 5265102ba5..8c4b084337 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -57,13 +57,18 @@ enum AcpiGhesNotifyType {
ACPI_GHES_NOTIFY_RESERVED = 12
};
-enum {
- ACPI_HEST_SRC_ID_SEA = 0,
- /* future ids go here */
-
- ACPI_GHES_ERROR_SOURCE_COUNT
+/*
+ * ID numbers used to fill HEST source ID field
+ */
+enum AcpiGhesSourceID {
+ ACPI_HEST_SRC_ID_SYNC,
};
+typedef struct AcpiNotificationSourceId {
+ enum AcpiGhesSourceID source_id;
+ enum AcpiGhesNotifyType notify;
+} AcpiNotificationSourceId;
+
/*
* AcpiGhesState stores GPA values that will be used to fill HEST entries.
*
@@ -84,6 +89,8 @@ typedef struct AcpiGhesState {
void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
GArray *hardware_errors,
BIOSLinker *linker,
+ const AcpiNotificationSourceId * const notif_source,
+ int num_sources,
const char *oem_id, const char *oem_table_id);
void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
GArray *hardware_errors);
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index 668ca72587..f49d0d628f 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -238,17 +238,17 @@ ghes_gen_err_data_uncorrectable_recoverable(GArray *block,
* See docs/specs/acpi_hest_ghes.rst for blobs format.
*/
static void build_ghes_error_table(AcpiGhesState *ags, GArray *hardware_errors,
- BIOSLinker *linker)
+ BIOSLinker *linker, int num_sources)
{
int i, error_status_block_offset;
/* Build error_block_address */
- for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
+ for (i = 0; i < num_sources; i++) {
build_append_int_noprefix(hardware_errors, 0, sizeof(uint64_t));
}
/* Build read_ack_register */
- for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
+ for (i = 0; i < num_sources; i++) {
/*
* Initialize the value of read_ack_register to 1, so GHES can be
* writable after (re)boot.
@@ -263,13 +263,13 @@ static void build_ghes_error_table(AcpiGhesState *ags, GArray *hardware_errors,
/* Reserve space for Error Status Data Block */
acpi_data_push(hardware_errors,
- ACPI_GHES_MAX_RAW_DATA_LENGTH * ACPI_GHES_ERROR_SOURCE_COUNT);
+ ACPI_GHES_MAX_RAW_DATA_LENGTH * num_sources);
/* Tell guest firmware to place hardware_errors blob into RAM */
bios_linker_loader_alloc(linker, ACPI_HW_ERROR_FW_CFG_FILE,
hardware_errors, sizeof(uint64_t), false);
- for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
+ for (i = 0; i < num_sources; i++) {
/*
* Tell firmware to patch error_block_address entries to point to
* corresponding "Generic Error Status Block"
@@ -295,12 +295,14 @@ static void build_ghes_error_table(AcpiGhesState *ags, GArray *hardware_errors,
}
/* Build Generic Hardware Error Source version 2 (GHESv2) */
-static void build_ghes_v2(GArray *table_data,
- BIOSLinker *linker,
- enum AcpiGhesNotifyType notify,
- uint16_t source_id)
+static void build_ghes_v2_entry(GArray *table_data,
+ BIOSLinker *linker,
+ const AcpiNotificationSourceId *notif_src,
+ uint16_t index, int num_sources)
{
uint64_t address_offset;
+ const uint16_t notify = notif_src->notify;
+ const uint16_t source_id = notif_src->source_id;
/*
* Type:
@@ -331,7 +333,7 @@ static void build_ghes_v2(GArray *table_data,
address_offset + GAS_ADDR_OFFSET,
sizeof(uint64_t),
ACPI_HW_ERROR_FW_CFG_FILE,
- source_id * sizeof(uint64_t));
+ index * sizeof(uint64_t));
/* Notification Structure */
build_ghes_hw_error_notification(table_data, notify);
@@ -351,8 +353,7 @@ static void build_ghes_v2(GArray *table_data,
address_offset + GAS_ADDR_OFFSET,
sizeof(uint64_t),
ACPI_HW_ERROR_FW_CFG_FILE,
- (ACPI_GHES_ERROR_SOURCE_COUNT + source_id)
- * sizeof(uint64_t));
+ (num_sources + index) * sizeof(uint64_t));
/*
* Read Ack Preserve field
@@ -368,22 +369,26 @@ static void build_ghes_v2(GArray *table_data,
void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
GArray *hardware_errors,
BIOSLinker *linker,
+ const AcpiNotificationSourceId *notif_source,
+ int num_sources,
const char *oem_id, const char *oem_table_id)
{
AcpiTable table = { .sig = "HEST", .rev = 1,
.oem_id = oem_id, .oem_table_id = oem_table_id };
uint32_t hest_offset;
+ int i;
hest_offset = table_data->len;
- build_ghes_error_table(ags, hardware_errors, linker);
+ build_ghes_error_table(ags, hardware_errors, linker, num_sources);
acpi_table_begin(&table, table_data);
/* Error Source Count */
- build_append_int_noprefix(table_data, ACPI_GHES_ERROR_SOURCE_COUNT, 4);
- build_ghes_v2(table_data, linker,
- ACPI_GHES_NOTIFY_SEA, ACPI_HEST_SRC_ID_SEA);
+ build_append_int_noprefix(table_data, num_sources, 4);
+ for (i = 0; i < num_sources; i++) {
+ build_ghes_v2_entry(table_data, linker, ¬if_source[i], i, num_sources);
+ }
acpi_table_end(linker, &table);
@@ -515,8 +520,6 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
return;
}
- assert(ACPI_GHES_ERROR_SOURCE_COUNT == 1);
-
if (!ags->use_hest_addr) {
get_hw_error_offsets(le64_to_cpu(ags->hw_error_le),
&cper_addr, &read_ack_register_addr);
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index bbe83fab9a..c856d293c6 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -1125,6 +1125,10 @@ static void acpi_align_size(GArray *blob, unsigned align)
g_array_set_size(blob, ROUND_UP(acpi_data_len(blob), align));
}
+static const AcpiNotificationSourceId hest_ghes_notify[] = {
+ { ACPI_HEST_SRC_ID_SYNC, ACPI_GHES_NOTIFY_SEA },
+};
+
static
void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
{
@@ -1189,7 +1193,9 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
if (ags) {
acpi_add_table(table_offsets, tables_blob);
acpi_build_hest(ags, tables_blob, tables->hardware_errors,
- tables->linker, vms->oem_id, vms->oem_table_id);
+ tables->linker, hest_ghes_notify,
+ ARRAY_SIZE(hest_ghes_notify),
+ vms->oem_id, vms->oem_table_id);
}
}
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 891d4fcde9..4f769d69b3 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -2456,7 +2456,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
*/
if (code == BUS_MCEERR_AR) {
kvm_cpu_synchronize_state(c);
- if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SEA,
+ if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
paddr)) {
kvm_inject_arm_sea(c);
} else {
--
MST
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PULL 21/75] acpi/ghes: add a notifier to notify when error data is ready
[not found] <cover.1759691708.git.mst@redhat.com>
` (5 preceding siblings ...)
2025-10-05 19:16 ` [PULL 20/75] acpi/ghes: don't hard-code the number of sources for HEST table Michael S. Tsirkin
@ 2025-10-05 19:16 ` Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 23/75] acpi/generic_event_device: add logic to detect if HEST addr is available Michael S. Tsirkin
` (4 subsequent siblings)
11 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2025-10-05 19:16 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Maydell, Mauro Carvalho Chehab, Jonathan Cameron,
Igor Mammedov, Dongjiu Geng, Ani Sinha, qemu-arm
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Some error injection notify methods are async, like GPIO
notify. Add a notifier to be used when the error record is
ready to be sent to the guest OS.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-ID: <edf9c6e5b80dc57e3443893bf9e1eb25cb9d266b.1758610789.git.mchehab+huawei@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
include/hw/acpi/ghes.h | 3 +++
hw/acpi/ghes.c | 5 ++++-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index 8c4b084337..390943e46d 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -24,6 +24,9 @@
#include "hw/acpi/bios-linker-loader.h"
#include "qapi/error.h"
+#include "qemu/notify.h"
+
+extern NotifierList acpi_generic_error_notifiers;
/*
* Values for Hardware Error Notification Type field
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index f49d0d628f..d666f1b10b 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -510,6 +510,9 @@ static void get_ghes_source_offsets(uint16_t source_id,
*read_ack_start_addr = le64_to_cpu(*read_ack_start_addr);
}
+NotifierList acpi_generic_error_notifiers =
+ NOTIFIER_LIST_INITIALIZER(acpi_generic_error_notifiers);
+
void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
uint16_t source_id, Error **errp)
{
@@ -550,7 +553,7 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
/* Write the generic error data entry into guest memory */
cpu_physical_memory_write(cper_addr, cper, len);
- return;
+ notifier_list_notify(&acpi_generic_error_notifiers, NULL);
}
int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
--
MST
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PULL 23/75] acpi/generic_event_device: add logic to detect if HEST addr is available
[not found] <cover.1759691708.git.mst@redhat.com>
` (6 preceding siblings ...)
2025-10-05 19:16 ` [PULL 21/75] acpi/ghes: add a notifier to notify when error data is ready Michael S. Tsirkin
@ 2025-10-05 19:16 ` Michael S. Tsirkin
2025-10-05 19:16 ` [PULL 26/75] arm/virt: Wire up a GED error device for ACPI / GHES Michael S. Tsirkin
` (3 subsequent siblings)
11 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2025-10-05 19:16 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Maydell, Mauro Carvalho Chehab, Jonathan Cameron,
Igor Mammedov, Ani Sinha, Shannon Zhao, Eduardo Habkost,
Marcel Apfelbaum, Philippe Mathieu-Daudé, Yanan Wang,
Zhao Liu, qemu-arm
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Create a new property (x-has-hest-addr) and use it to detect if
the GHES table offsets can be calculated from the HEST address
(qemu 10.0 and upper) or via the legacy way via an offset obtained
from the hardware_errors firmware file.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-ID: <c4eb3cf32a3f158ae62dac29e866ac3f373956c3.1758610789.git.mchehab+huawei@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/acpi/generic_event_device.c | 2 ++
hw/arm/virt-acpi-build.c | 18 ++++++++++++++++--
hw/core/machine.c | 5 ++++-
3 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index 55998303c2..b1ff6ab74d 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -349,6 +349,8 @@ static const Property acpi_ged_properties[] = {
pcihp_state.use_acpi_hotplug_bridge, 0),
DEFINE_PROP_LINK("bus", AcpiGedState, pcihp_state.root,
TYPE_PCI_BUS, PCIBus *),
+ DEFINE_PROP_BOOL("x-has-hest-addr", AcpiGedState,
+ ghes_state.use_hest_addr, false),
};
static const VMStateDescription vmstate_memhp_state = {
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index c856d293c6..ff3b7a794b 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -1129,6 +1129,10 @@ static const AcpiNotificationSourceId hest_ghes_notify[] = {
{ ACPI_HEST_SRC_ID_SYNC, ACPI_GHES_NOTIFY_SEA },
};
+static const AcpiNotificationSourceId hest_ghes_notify_10_0[] = {
+ { ACPI_HEST_SRC_ID_SYNC, ACPI_GHES_NOTIFY_SEA },
+};
+
static
void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
{
@@ -1186,15 +1190,25 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
if (vms->ras) {
AcpiGedState *acpi_ged_state;
+ static const AcpiNotificationSourceId *notify;
+ unsigned int notify_sz;
AcpiGhesState *ags;
acpi_ged_state = ACPI_GED(vms->acpi_dev);
ags = &acpi_ged_state->ghes_state;
if (ags) {
acpi_add_table(table_offsets, tables_blob);
+
+ if (!ags->use_hest_addr) {
+ notify = hest_ghes_notify_10_0;
+ notify_sz = ARRAY_SIZE(hest_ghes_notify_10_0);
+ } else {
+ notify = hest_ghes_notify;
+ notify_sz = ARRAY_SIZE(hest_ghes_notify);
+ }
+
acpi_build_hest(ags, tables_blob, tables->hardware_errors,
- tables->linker, hest_ghes_notify,
- ARRAY_SIZE(hest_ghes_notify),
+ tables->linker, notify, notify_sz,
vms->oem_id, vms->oem_table_id);
}
}
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 38c949c4f2..7b7a381b0a 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -35,9 +35,12 @@
#include "hw/virtio/virtio-pci.h"
#include "hw/virtio/virtio-net.h"
#include "hw/virtio/virtio-iommu.h"
+#include "hw/acpi/generic_event_device.h"
#include "audio/audio.h"
-GlobalProperty hw_compat_10_1[] = {};
+GlobalProperty hw_compat_10_1[] = {
+ { TYPE_ACPI_GED, "x-has-hest-addr", "false" },
+};
const size_t hw_compat_10_1_len = G_N_ELEMENTS(hw_compat_10_1);
GlobalProperty hw_compat_10_0[] = {
--
MST
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PULL 26/75] arm/virt: Wire up a GED error device for ACPI / GHES
[not found] <cover.1759691708.git.mst@redhat.com>
` (7 preceding siblings ...)
2025-10-05 19:16 ` [PULL 23/75] acpi/generic_event_device: add logic to detect if HEST addr is available Michael S. Tsirkin
@ 2025-10-05 19:16 ` Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 27/75] qapi/acpi-hest: add an interface to do generic CPER error injection Michael S. Tsirkin
` (2 subsequent siblings)
11 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2025-10-05 19:16 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Maydell, Mauro Carvalho Chehab, Jonathan Cameron,
Igor Mammedov, Ani Sinha, Shannon Zhao, qemu-arm
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Adds support to ARM virtualization to allow handling
generic error ACPI Event via GED & error source device.
It is aligned with Linux Kernel patch:
https://lore.kernel.org/lkml/1272350481-27951-8-git-send-email-ying.huang@intel.com/
Co-authored-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Co-authored-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-ID: <3237a76b1469d669436399495825348bf34122cd.1758610789.git.mchehab+huawei@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
include/hw/arm/virt.h | 1 +
hw/arm/virt-acpi-build.c | 1 +
hw/arm/virt.c | 12 +++++++++++-
3 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index ea2cff05b0..e14ea0f9d4 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -174,6 +174,7 @@ struct VirtMachineState {
DeviceState *gic;
DeviceState *acpi_dev;
Notifier powerdown_notifier;
+ Notifier generic_error_notifier;
PCIBus *bus;
char *oem_id;
char *oem_table_id;
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index ff3b7a794b..2b63008df0 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -1066,6 +1066,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
}
acpi_dsdt_add_power_button(scope);
+ aml_append(scope, aml_error_device());
#ifdef CONFIG_TPM
acpi_dsdt_add_tpm(scope, vms);
#endif
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 02209fadcf..6960f6113f 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -693,7 +693,7 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms)
MachineState *ms = MACHINE(vms);
SysBusDevice *sbdev;
int irq = vms->irqmap[VIRT_ACPI_GED];
- uint32_t event = ACPI_GED_PWR_DOWN_EVT;
+ uint32_t event = ACPI_GED_PWR_DOWN_EVT | ACPI_GED_ERROR_EVT;
bool acpi_pcihp;
if (ms->ram_slots) {
@@ -1050,6 +1050,13 @@ static void virt_powerdown_req(Notifier *n, void *opaque)
}
}
+static void virt_generic_error_req(Notifier *n, void *opaque)
+{
+ VirtMachineState *s = container_of(n, VirtMachineState, generic_error_notifier);
+
+ acpi_send_event(s->acpi_dev, ACPI_GENERIC_ERROR);
+}
+
static void create_gpio_keys(char *fdt, DeviceState *pl061_dev,
uint32_t phandle)
{
@@ -2500,6 +2507,9 @@ static void machvirt_init(MachineState *machine)
if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) {
vms->acpi_dev = create_acpi_ged(vms);
+ vms->generic_error_notifier.notify = virt_generic_error_req;
+ notifier_list_add(&acpi_generic_error_notifiers,
+ &vms->generic_error_notifier);
} else {
create_gpio_devices(vms, VIRT_GPIO, sysmem);
}
--
MST
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PULL 27/75] qapi/acpi-hest: add an interface to do generic CPER error injection
[not found] <cover.1759691708.git.mst@redhat.com>
` (8 preceding siblings ...)
2025-10-05 19:16 ` [PULL 26/75] arm/virt: Wire up a GED error device for ACPI / GHES Michael S. Tsirkin
@ 2025-10-05 19:17 ` Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 30/75] docs: hest: add new "etc/acpi_table_hest_addr" and update workflow Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 36/75] smbios: cap DIMM size to 2Tb as workaround for broken Windows Michael S. Tsirkin
11 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2025-10-05 19:17 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Maydell, Mauro Carvalho Chehab, Jonathan Cameron,
Shiju Jose, Igor Mammedov, Markus Armbruster, Ani Sinha,
Paolo Bonzini, Dongjiu Geng, Shannon Zhao, Eric Blake,
Michael Roth, qemu-arm
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Create a QMP command to be used for generic ACPI APEI hardware error
injection (HEST) via GHESv2, and add support for it for ARM guests.
Error injection uses ACPI_HEST_SRC_ID_QMP source ID to be platform
independent. This is mapped at arch virt bindings, depending on the
types supported by QEMU and by the BIOS. So, on ARM, this is supported
via ACPI_GHES_NOTIFY_GPIO notification type.
This patch was co-authored:
- original ghes logic to inject a simple ARM record by Shiju Jose;
- generic logic to handle block addresses by Jonathan Cameron;
- generic GHESv2 error inject by Mauro Carvalho Chehab;
Co-authored-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Co-authored-by: Shiju Jose <shiju.jose@huawei.com>
Co-authored-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Igor Mammedov <imammedo@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-ID: <81e2118b3c8b7e5da341817f277d61251655e0db.1758610789.git.mchehab+huawei@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
qapi/acpi-hest.json | 36 ++++++++++++++++++++++++++++++++++++
qapi/qapi-schema.json | 1 +
include/hw/acpi/ghes.h | 1 +
include/hw/arm/virt.h | 1 +
hw/acpi/ghes.c | 2 +-
hw/acpi/ghes_cper.c | 40 ++++++++++++++++++++++++++++++++++++++++
hw/acpi/ghes_cper_stub.c | 20 ++++++++++++++++++++
hw/arm/virt-acpi-build.c | 1 +
hw/arm/virt.c | 7 +++++++
MAINTAINERS | 7 +++++++
hw/acpi/Kconfig | 5 +++++
hw/acpi/meson.build | 2 ++
qapi/meson.build | 1 +
13 files changed, 123 insertions(+), 1 deletion(-)
create mode 100644 qapi/acpi-hest.json
create mode 100644 hw/acpi/ghes_cper.c
create mode 100644 hw/acpi/ghes_cper_stub.c
diff --git a/qapi/acpi-hest.json b/qapi/acpi-hest.json
new file mode 100644
index 0000000000..28af1266a7
--- /dev/null
+++ b/qapi/acpi-hest.json
@@ -0,0 +1,36 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+##
+# == GHESv2 CPER Error Injection
+#
+# Defined since ACPI Specification 6.1,
+# section 18.3.2.8 Generic Hardware Error Source version 2. See:
+#
+# https://uefi.org/sites/default/files/resources/ACPI_6_1.pdf
+##
+
+
+##
+# @inject-ghes-v2-error:
+#
+# Inject an error with additional ACPI 6.1 GHESv2 error information
+#
+# @cper: contains a base64 encoded string with raw data for a single
+# CPER record with Generic Error Status Block, Generic Error Data
+# Entry and generic error data payload, as described at
+# https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#format
+#
+# Features:
+#
+# @unstable: This command is experimental.
+#
+# Since: 10.2
+##
+{ 'command': 'inject-ghes-v2-error',
+ 'data': {
+ 'cper': 'str'
+ },
+ 'features': [ 'unstable' ]
+}
diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
index 82f111ba06..b93dd68d94 100644
--- a/qapi/qapi-schema.json
+++ b/qapi/qapi-schema.json
@@ -68,6 +68,7 @@
{ 'include': 'misc-i386.json' }
{ 'include': 'audio.json' }
{ 'include': 'acpi.json' }
+{ 'include': 'acpi-hest.json' }
{ 'include': 'pci.json' }
{ 'include': 'stats.json' }
{ 'include': 'virtio.json' }
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index 390943e46d..df2ecbf6e4 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -65,6 +65,7 @@ enum AcpiGhesNotifyType {
*/
enum AcpiGhesSourceID {
ACPI_HEST_SRC_ID_SYNC,
+ ACPI_HEST_SRC_ID_QMP, /* Use it only for QMP injected errors */
};
typedef struct AcpiNotificationSourceId {
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index e14ea0f9d4..04a09af354 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -33,6 +33,7 @@
#include "exec/hwaddr.h"
#include "qemu/notify.h"
#include "hw/boards.h"
+#include "hw/acpi/ghes.h"
#include "hw/arm/boot.h"
#include "hw/arm/bsa.h"
#include "hw/block/flash.h"
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index d666f1b10b..06555905ce 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -553,7 +553,7 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
/* Write the generic error data entry into guest memory */
cpu_physical_memory_write(cper_addr, cper, len);
- notifier_list_notify(&acpi_generic_error_notifiers, NULL);
+ notifier_list_notify(&acpi_generic_error_notifiers, &source_id);
}
int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
diff --git a/hw/acpi/ghes_cper.c b/hw/acpi/ghes_cper.c
new file mode 100644
index 0000000000..31cb2ffabe
--- /dev/null
+++ b/hw/acpi/ghes_cper.c
@@ -0,0 +1,40 @@
+/*
+ * CPER payload parser for error injection
+ *
+ * Copyright(C) 2024-2025 Huawei LTD.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+
+#include "qemu/base64.h"
+#include "qemu/error-report.h"
+#include "qemu/uuid.h"
+#include "qapi/qapi-commands-acpi-hest.h"
+#include "hw/acpi/ghes.h"
+
+void qmp_inject_ghes_v2_error(const char *qmp_cper, Error **errp)
+{
+ AcpiGhesState *ags;
+ uint8_t *cper;
+ size_t len;
+
+ ags = acpi_ghes_get_state();
+ if (!ags) {
+ return;
+ }
+
+ cper = qbase64_decode(qmp_cper, -1, &len, errp);
+ if (!cper) {
+ error_setg(errp, "missing GHES CPER payload");
+ return;
+ }
+
+ ghes_record_cper_errors(ags, cper, len, ACPI_HEST_SRC_ID_QMP, errp);
+
+ g_free(cper);
+}
diff --git a/hw/acpi/ghes_cper_stub.c b/hw/acpi/ghes_cper_stub.c
new file mode 100644
index 0000000000..b16be73502
--- /dev/null
+++ b/hw/acpi/ghes_cper_stub.c
@@ -0,0 +1,20 @@
+/*
+ * Stub interface for CPER payload parser for error injection
+ *
+ * Copyright(C) 2024-2025 Huawei LTD.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-acpi-hest.h"
+#include "hw/acpi/ghes.h"
+
+void qmp_inject_ghes_v2_error(const char *cper, Error **errp)
+{
+ error_setg(errp, "GHES QMP error inject is not compiled in");
+}
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 2b63008df0..8bb6b60515 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -1128,6 +1128,7 @@ static void acpi_align_size(GArray *blob, unsigned align)
static const AcpiNotificationSourceId hest_ghes_notify[] = {
{ ACPI_HEST_SRC_ID_SYNC, ACPI_GHES_NOTIFY_SEA },
+ { ACPI_HEST_SRC_ID_QMP, ACPI_GHES_NOTIFY_GPIO },
};
static const AcpiNotificationSourceId hest_ghes_notify_10_0[] = {
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6960f6113f..aad557be1a 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1052,6 +1052,13 @@ static void virt_powerdown_req(Notifier *n, void *opaque)
static void virt_generic_error_req(Notifier *n, void *opaque)
{
+ uint16_t *source_id = opaque;
+
+ /* Currently, only QMP source ID is async */
+ if (*source_id != ACPI_HEST_SRC_ID_QMP) {
+ return;
+ }
+
VirtMachineState *s = container_of(n, VirtMachineState, generic_error_notifier);
acpi_send_event(s->acpi_dev, ACPI_GENERIC_ERROR);
diff --git a/MAINTAINERS b/MAINTAINERS
index 406cef88f0..b31ae5193d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2173,6 +2173,13 @@ F: hw/acpi/ghes.c
F: include/hw/acpi/ghes.h
F: docs/specs/acpi_hest_ghes.rst
+ACPI/HEST/GHES/ARM processor CPER
+R: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+S: Maintained
+F: hw/arm/ghes_cper.c
+F: hw/acpi/ghes_cper_stub.c
+F: qapi/acpi-hest.json
+
ppc4xx
L: qemu-ppc@nongnu.org
S: Orphan
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 1d4e9f0845..daabbe6cd1 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -51,6 +51,11 @@ config ACPI_APEI
bool
depends on ACPI
+config GHES_CPER
+ bool
+ depends on ACPI_APEI
+ default y
+
config ACPI_PCI
bool
depends on ACPI && PCI
diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build
index 73f02b9691..56b5d1ec96 100644
--- a/hw/acpi/meson.build
+++ b/hw/acpi/meson.build
@@ -34,4 +34,6 @@ endif
system_ss.add(when: 'CONFIG_ACPI', if_false: files('acpi-stub.c', 'aml-build-stub.c', 'ghes-stub.c', 'acpi_interface.c'))
system_ss.add(when: 'CONFIG_ACPI_PCI_BRIDGE', if_false: files('pci-bridge-stub.c'))
system_ss.add_all(when: 'CONFIG_ACPI', if_true: acpi_ss)
+system_ss.add(when: 'CONFIG_GHES_CPER', if_true: files('ghes_cper.c'))
+system_ss.add(when: 'CONFIG_GHES_CPER', if_false: files('ghes_cper_stub.c'))
system_ss.add(files('acpi-qmp-cmds.c'))
diff --git a/qapi/meson.build b/qapi/meson.build
index ca6b61a608..a46269b5a0 100644
--- a/qapi/meson.build
+++ b/qapi/meson.build
@@ -59,6 +59,7 @@ if have_system
qapi_all_modules += [
'accelerator',
'acpi',
+ 'acpi-hest',
'audio',
'cryptodev',
'qdev',
--
MST
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PULL 30/75] docs: hest: add new "etc/acpi_table_hest_addr" and update workflow
[not found] <cover.1759691708.git.mst@redhat.com>
` (9 preceding siblings ...)
2025-10-05 19:17 ` [PULL 27/75] qapi/acpi-hest: add an interface to do generic CPER error injection Michael S. Tsirkin
@ 2025-10-05 19:17 ` Michael S. Tsirkin
2025-10-05 19:17 ` [PULL 36/75] smbios: cap DIMM size to 2Tb as workaround for broken Windows Michael S. Tsirkin
11 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2025-10-05 19:17 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Maydell, Mauro Carvalho Chehab, Jonathan Cameron,
Igor Mammedov, Dongjiu Geng, qemu-arm
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
While the HEST layout didn't change, there are some internal
changes related to how offsets are calculated and how memory error
events are triggered.
Update specs to reflect such changes.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-ID: <e3e8bd92ce40d997c67ac1d4d973c0041b8f59fc.1758610789.git.mchehab+huawei@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
docs/specs/acpi_hest_ghes.rst | 28 +++++++++++++++++-----------
1 file changed, 17 insertions(+), 11 deletions(-)
diff --git a/docs/specs/acpi_hest_ghes.rst b/docs/specs/acpi_hest_ghes.rst
index c3e9f8d9a7..aaf7b1ad11 100644
--- a/docs/specs/acpi_hest_ghes.rst
+++ b/docs/specs/acpi_hest_ghes.rst
@@ -89,12 +89,21 @@ Design Details
addresses in the "error_block_address" fields with a pointer to the
respective "Error Status Data Block" in the "etc/hardware_errors" blob.
-(8) QEMU defines a third and write-only fw_cfg blob which is called
- "etc/hardware_errors_addr". Through that blob, the firmware can send back
- the guest-side allocation addresses to QEMU. The "etc/hardware_errors_addr"
- blob contains a 8-byte entry. QEMU generates a single WRITE_POINTER command
- for the firmware. The firmware will write back the start address of
- "etc/hardware_errors" blob to the fw_cfg file "etc/hardware_errors_addr".
+(8) QEMU defines a third and write-only fw_cfg blob to store the location
+ where the error block offsets, read ack registers and CPER records are
+ stored.
+
+ Up to QEMU 9.2, the location was at "etc/hardware_errors_addr", and
+ contains a GPA for the beginning of "etc/hardware_errors".
+
+ Newer versions place the location at "etc/acpi_table_hest_addr",
+ pointing to the GPA of the HEST table.
+
+ Using above mentioned 'fw_cfg' files, the firmware can send back the
+ guest-side allocation addresses to QEMU. They contain a 8-byte entry.
+ QEMU generates a single WRITE_POINTER command for the firmware. The
+ firmware will write back the start address of either "etc/hardware_errors"
+ or HEST table at the corresponding fw_cfg file.
(9) When QEMU gets a SIGBUS from the kernel, QEMU writes CPER into corresponding
"Error Status Data Block", guest memory, and then injects platform specific
@@ -105,8 +114,5 @@ Design Details
kernel, on receiving notification, guest APEI driver could read the CPER error
and take appropriate action.
-(11) kvm_arch_on_sigbus_vcpu() uses source_id as index in "etc/hardware_errors" to
- find out "Error Status Data Block" entry corresponding to error source. So supported
- source_id values should be assigned here and not be changed afterwards to make sure
- that guest will write error into expected "Error Status Data Block" even if guest was
- migrated to a newer QEMU.
+(11) kvm_arch_on_sigbus_vcpu() reports RAS errors via a SEA notifications,
+ when a SIGBUS event is triggered.
--
MST
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PULL 36/75] smbios: cap DIMM size to 2Tb as workaround for broken Windows
[not found] <cover.1759691708.git.mst@redhat.com>
` (10 preceding siblings ...)
2025-10-05 19:17 ` [PULL 30/75] docs: hest: add new "etc/acpi_table_hest_addr" and update workflow Michael S. Tsirkin
@ 2025-10-05 19:17 ` Michael S. Tsirkin
11 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2025-10-05 19:17 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Maydell, Igor Mammedov, Eduardo Habkost, Marcel Apfelbaum,
Philippe Mathieu-Daudé, Yanan Wang, Zhao Liu, Paolo Bonzini,
Richard Henderson, qemu-arm
From: Igor Mammedov <imammedo@redhat.com>
With current limit set to match max spec size (2PTb),
Windows fails to parse type 17 records when DIMM size reaches 4Tb+.
Failure happens in GetPhysicallyInstalledSystemMemory() function,
and fails "Check SMBIOS System Memory Tables" SVVP test.
Though not fatal, it might cause issues for userspace apps,
something like [1].
Lets cap default DIMM size to 2Tb for now, until MS fixes it.
1) https://issues.redhat.com/browse/RHEL-81999?focusedId=27731200&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-27731200
PS: It's obvious 32 int overflow math somewhere in Windows,
MS admitted that it's Windows bug and in a process of fixing it.
However it's unclear if W10 and earlier would get the fix.
So however I dislike changing defaults, we heed to work around
the issue (it looks like QEMU regression while not being it).
Hopefully 2Tb/DIMM split will last longer until VM memory size
will become large enough to cause to many type 17 records issue
again.
PPS:
Alternatively, instead of messing with defaults, we can create
a dedicated knob to ask for desired DIMM size cap explicitly
on CLI. That will let users to enable workaround when they
hit this corner case. Downside is that knob has to be propagated
up all mgmt stack, which might be not desirable.
PPPS:
Yet alternatively, users can configure initial RAM to be less
than 4Tb and all additional RAM add as DIMMs on QEMU CLI.
(however it's the job to be done by mgmt which could know
Windows version and total amount of RAM)
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Fixes: 62f182c97b ("smbios: make memory device size configurable per Machine")
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-ID: <20250901084915.2607632-1-imammedo@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/arm/virt.c | 1 +
hw/core/machine.c | 5 ++++-
hw/i386/pc_piix.c | 1 +
hw/i386/pc_q35.c | 1 +
4 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index aad557be1a..175023897a 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3537,6 +3537,7 @@ DEFINE_VIRT_MACHINE_AS_LATEST(10, 2)
static void virt_machine_10_1_options(MachineClass *mc)
{
virt_machine_10_2_options(mc);
+ mc->smbios_memory_device_size = 2047 * TiB;
compat_props_add(mc->compat_props, hw_compat_10_1, hw_compat_10_1_len);
}
DEFINE_VIRT_MACHINE(10, 1)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 7b7a381b0a..681adbb7ac 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1118,8 +1118,11 @@ static void machine_class_init(ObjectClass *oc, const void *data)
* SMBIOS 3.1.0 7.18.5 Memory Device — Extended Size
* use max possible value that could be encoded into
* 'Extended Size' field (2047Tb).
+ *
+ * Unfortunately (current) Windows Server 2025 and earlier do not handle
+ * 4Tb+ DIMM size.
*/
- mc->smbios_memory_device_size = 2047 * TiB;
+ mc->smbios_memory_device_size = 2 * TiB;
/* numa node memory size aligned on 8MB by default.
* On Linux, each node's border has to be 8MB aligned
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index caf8bab68e..7b3611e973 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -448,6 +448,7 @@ DEFINE_I440FX_MACHINE_AS_LATEST(10, 2);
static void pc_i440fx_machine_10_1_options(MachineClass *m)
{
pc_i440fx_machine_10_2_options(m);
+ m->smbios_memory_device_size = 2047 * TiB;
compat_props_add(m->compat_props, hw_compat_10_1, hw_compat_10_1_len);
compat_props_add(m->compat_props, pc_compat_10_1, pc_compat_10_1_len);
}
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index e89951285e..6015e639d7 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -384,6 +384,7 @@ DEFINE_Q35_MACHINE_AS_LATEST(10, 2);
static void pc_q35_machine_10_1_options(MachineClass *m)
{
pc_q35_machine_10_2_options(m);
+ m->smbios_memory_device_size = 2047 * TiB;
compat_props_add(m->compat_props, hw_compat_10_1, hw_compat_10_1_len);
compat_props_add(m->compat_props, pc_compat_10_1, pc_compat_10_1_len);
}
--
MST
^ permalink raw reply related [flat|nested] 12+ messages in thread