* [PATCH 1/8] ras: aest: Fix shared processor node handling and error log messages
2026-05-05 12:23 [PATCH 0/8] ras: aest: extend AEST support to Device Tree frontend Umang Chheda
@ 2026-05-05 12:23 ` Umang Chheda
2026-05-05 12:23 ` [PATCH 2/8] ras: aest: Fix CE/UE error counts not incrementing in debugfs Umang Chheda
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Umang Chheda @ 2026-05-05 12:23 UTC (permalink / raw)
To: Ruidong Tian, Tony Luck, Borislav Petkov, Rob Herring,
Krzysztof Kozlowski, Conor Dooley, Bjorn Andersson, Konrad Dybcio,
catalin.marinas, will, lpieralisi, rafael, mark.rutland,
Sudeep Holla
Cc: linux-arm-msm, linux-acpi, linux-arm-kernel, linux-edac,
linux-kernel, devicetree, linux-edac, Umang Chheda
Two related fixes for processor nodes with ACPI_AEST_PROC_FLAG_SHARED
or ACPI_AEST_PROC_FLAG_GLOBAL set (e.g. cluster L3 cache, DSU):
1. aest_dev_is_oncore() returns true for any PROCESSOR_ERROR_NODE,
causing shared processor nodes (which use an SPI) to take the
cpuhp/PPI path. cpuhp_setup_state() is called instead of
aest_online_dev(), so aest_config_irq() is never called and the
hardware IRQ-config register is never programmed.
Fix aest_dev_is_oncore() to check irq_is_percpu() on the registered
IRQ. Only nodes whose FHI or ERI is a per-CPU PPI take the oncore
path, nodes with an SPI take aest_online_dev().
2. alloc_aest_node_name() uses processor_id for the node name of all
processor nodes. Shared/global nodes have processor_id=0 (the
field is unused when SHARED/GLOBAL is set), so every shared node
and the per-PE node for CPU 0 both got the name "processor.0",
making error logs ambiguous.
For shared/global nodes, build the name as
"processor.<resource_type>.<device_id>" (e.g. "processor.cache.1")
so each node has a unique, meaningful identifier. Per-PE nodes
keep the original "processor.<mpidr>" form.
Also add proc_flags to struct aest_event so aest_print() can
distinguish shared from per-PE nodes and print an appropriate
message.
Signed-off-by: Umang Chheda <umang.chheda@oss.qualcomm.com>
---
drivers/ras/aest/aest-core.c | 54 ++++++++++++++++++++++++++++++++++++++++----
drivers/ras/aest/aest.h | 15 +++++++++++-
2 files changed, 64 insertions(+), 5 deletions(-)
diff --git a/drivers/ras/aest/aest-core.c b/drivers/ras/aest/aest-core.c
index 6a2d84b47721..b4f4c975da1d 100644
--- a/drivers/ras/aest/aest-core.c
+++ b/drivers/ras/aest/aest-core.c
@@ -49,7 +49,19 @@ static void aest_print(struct aest_event *event)
switch (event->type) {
case ACPI_AEST_PROCESSOR_ERROR_NODE:
- pr_err("%s Error from CPU%d\n", pfx_seq, event->id0);
+ /*
+ * For shared/global nodes (e.g. cluster L3 cache, DSU),
+ * id0 is the CPU that handled the interrupt — not the error
+ * source itself. The node_name already identifies the resource
+ * (e.g. "processor.cache.1"). Print a distinct message so the
+ * log is not confused with a per-PE CPU error.
+ */
+ if (event->proc_flags &
+ (ACPI_AEST_PROC_FLAG_SHARED | ACPI_AEST_PROC_FLAG_GLOBAL))
+ pr_err("%s Error from shared processor resource (interrupt handled on CPU%d)\n",
+ pfx_seq, event->id0);
+ else
+ pr_err("%s Error from CPU%d\n", pfx_seq, event->id0);
break;
case ACPI_AEST_MEMORY_ERROR_NODE:
pr_err("%s Error from memory at SRAT proximity domain %#x\n",
@@ -133,6 +145,7 @@ static void init_aest_event(struct aest_event *event,
info->processor->processor_id);
event->id1 = info->processor->resource_type;
+ event->proc_flags = info->processor->flags;
break;
case ACPI_AEST_MEMORY_ERROR_NODE:
event->id0 = info->memory->srat_proximity_domain;
@@ -175,6 +188,7 @@ static int aest_node_gen_pool_add(struct aest_device *adev,
if (!event)
return -ENOMEM;
+ memset(event, 0, sizeof(*event));
init_aest_event(event, record, regs);
llist_add(&event->llnode, &adev->event_list);
@@ -730,9 +744,41 @@ static char *alloc_aest_node_name(struct aest_node *node)
switch (node->type) {
case ACPI_AEST_PROCESSOR_ERROR_NODE:
- name = devm_kasprintf(node->adev->dev, GFP_KERNEL, "%s.%d",
- aest_node_name[node->type],
- node->info->processor->processor_id);
+ /*
+ * Shared/global processor nodes (e.g. cluster L3 cache, DSU)
+ * have processor_id=0 and use smp_processor_id() at error-log
+ * time — using processor_id in the name would produce the same
+ * "processor.0" string for every shared node and every CPU0
+ * per-PE node, making logs ambiguous.
+ *
+ * For shared/global nodes, build the name from the resource
+ * type and the device id so each node gets a unique, meaningful
+ * name (e.g. "processor.cache.1", "processor.tlb.2").
+ *
+ * For per-PE nodes, keep the original "processor.<mpidr>" form.
+ */
+ if (node->info->processor->flags &
+ (ACPI_AEST_PROC_FLAG_SHARED | ACPI_AEST_PROC_FLAG_GLOBAL)) {
+ static const char *const res_name[] = {
+ [ACPI_AEST_CACHE_RESOURCE] = "cache",
+ [ACPI_AEST_TLB_RESOURCE] = "tlb",
+ [ACPI_AEST_GENERIC_RESOURCE] = "generic",
+ };
+ u8 rtype = node->info->processor->resource_type;
+ const char *rstr = (rtype < ARRAY_SIZE(res_name) &&
+ res_name[rtype]) ? res_name[rtype] : "unknown";
+
+ name = devm_kasprintf(node->adev->dev, GFP_KERNEL,
+ "%s.%s.%d",
+ aest_node_name[node->type],
+ rstr,
+ node->adev->id);
+ } else {
+ name = devm_kasprintf(node->adev->dev, GFP_KERNEL,
+ "%s.%d",
+ aest_node_name[node->type],
+ node->info->processor->processor_id);
+ }
break;
case ACPI_AEST_MEMORY_ERROR_NODE:
case ACPI_AEST_SMMU_ERROR_NODE:
diff --git a/drivers/ras/aest/aest.h b/drivers/ras/aest/aest.h
index 9d67d79eb4a2..9704af97fee8 100644
--- a/drivers/ras/aest/aest.h
+++ b/drivers/ras/aest/aest.h
@@ -8,6 +8,7 @@
#include <linux/acpi_aest.h>
#include <asm/ras.h>
#include <linux/debugfs.h>
+#include <linux/irqdesc.h>
#define MAX_GSI_PER_NODE 2
#define DEFAULT_CE_THRESHOLD 1
@@ -94,6 +95,8 @@ struct aest_event {
/* Vendor node : hardware ID. */
char *hid;
u32 index;
+ /* Processor node: ACPI_AEST_PROC_FLAG_* bitmask (SHARED/GLOBAL) */
+ u8 proc_flags;
u64 ce_threshold;
int addressing_mode;
struct ras_ext_regs regs;
@@ -387,7 +390,17 @@ static inline void aest_sync(struct aest_node *node)
static inline bool aest_dev_is_oncore(struct aest_device *adev)
{
- return adev->type == ACPI_AEST_PROCESSOR_ERROR_NODE;
+ /*
+ * A processor node is "on-core" (uses PPI + cpuhp) only when its
+ * interrupt is a per-CPU PPI. A shared processor node (e.g. cluster
+ * L3 cache, DSU) uses an SPI and must follow the non-oncore path
+ * (aest_online_dev) so that aest_config_irq and aest_online_dev are
+ * called instead of cpuhp_setup_state.
+ */
+ if (adev->type != ACPI_AEST_PROCESSOR_ERROR_NODE)
+ return false;
+ return irq_is_percpu(adev->irq[ACPI_AEST_NODE_FAULT_HANDLING]) ||
+ irq_is_percpu(adev->irq[ACPI_AEST_NODE_ERROR_RECOVERY]);
}
static inline int default_errgsr_mapping(int errgsr_bit)
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 2/8] ras: aest: Fix CE/UE error counts not incrementing in debugfs
2026-05-05 12:23 [PATCH 0/8] ras: aest: extend AEST support to Device Tree frontend Umang Chheda
2026-05-05 12:23 ` [PATCH 1/8] ras: aest: Fix shared processor node handling and error log messages Umang Chheda
@ 2026-05-05 12:23 ` Umang Chheda
2026-05-05 12:23 ` [PATCH 3/8] ras: aest: Skip unimplemented records " Umang Chheda
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Umang Chheda @ 2026-05-05 12:23 UTC (permalink / raw)
To: Ruidong Tian, Tony Luck, Borislav Petkov, Rob Herring,
Krzysztof Kozlowski, Conor Dooley, Bjorn Andersson, Konrad Dybcio,
catalin.marinas, will, lpieralisi, rafael, mark.rutland,
Sudeep Holla
Cc: linux-arm-msm, linux-acpi, linux-arm-kernel, linux-edac,
linux-kernel, devicetree, linux-edac, Umang Chheda
The error counts visible under:
/sys/kernel/debug/aest/<dev>/processor<cpu>/<node>/err_count
always reported zero, even though corrected errors (CEs) were being
serviced by the interrupt handler. aest_oncore_dev_init_debugfs() sets
up per CPU debugfs entries but wired them up incorrectly in two places:
- this_cpu_ptr(adev->adev_oncore) was used inside for_each_possible_cpu().
This always selects the slot for the CPU executing the init code, so all
debugfs files ended up referencing the same per CPU aest_device instance
instead of the CPU indicated by the loop variable.
- The code referenced adev->nodes[i], i.e. the template nodes allocated
before __setup_ppi, rather than the per-CPU copies at
percpu_dev->nodes[i]. The IRQ handler updates CE counters in the per-CPU
records created by __setup_ppi, the template records are never touched
at runtime, so err_count always read as zero.
Fix this by:
- Using per_cpu_ptr(adev->adev_oncore, cpu) when iterating over CPUs.
Wiring debugfs files to percpu_dev->nodes[i] so counters reflect the
data updated by the IRQ handler.
- Using adev->nodes[i].name for debugfs directory names. The per-CPU node
receives name via a shallow memcpy and is not the authoritative source.
Signed-off-by: Umang Chheda <umang.chheda@oss.qualcomm.com>
---
drivers/ras/aest/aest-sysfs.c | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/drivers/ras/aest/aest-sysfs.c b/drivers/ras/aest/aest-sysfs.c
index 66e9c1103f99..f710503e4d74 100644
--- a/drivers/ras/aest/aest-sysfs.c
+++ b/drivers/ras/aest/aest-sysfs.c
@@ -189,16 +189,23 @@ aest_oncore_dev_init_debugfs(struct aest_device *adev)
char name[16];
for_each_possible_cpu(cpu) {
- percpu_dev = this_cpu_ptr(adev->adev_oncore);
+ percpu_dev = per_cpu_ptr(adev->adev_oncore, cpu);
- snprintf(name, sizeof(name), "processor%u%u", cpu);
+ snprintf(name, sizeof(name), "processor%u", cpu);
percpu_dev->debugfs = debugfs_create_dir(name, adev->debugfs);
for (i = 0; i < adev->node_cnt; i++) {
- node = &adev->nodes[i];
-
- node->debugfs = debugfs_create_dir(node->name,
- percpu_dev->debugfs);
+ node = &percpu_dev->nodes[i];
+
+ /*
+ * Use adev->nodes[i].name (the original) rather than
+ * node->name from the per-CPU copy. The per-CPU copy
+ * receives node->name via shallow memcpy in __setup_ppi;
+ * the original is the authoritative, guaranteed-valid
+ * string.
+ */
+ node->debugfs = debugfs_create_dir(adev->nodes[i].name,
+ percpu_dev->debugfs);
aest_node_init_debugfs(node);
}
}
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 3/8] ras: aest: Skip unimplemented records in debugfs
2026-05-05 12:23 [PATCH 0/8] ras: aest: extend AEST support to Device Tree frontend Umang Chheda
2026-05-05 12:23 ` [PATCH 1/8] ras: aest: Fix shared processor node handling and error log messages Umang Chheda
2026-05-05 12:23 ` [PATCH 2/8] ras: aest: Fix CE/UE error counts not incrementing in debugfs Umang Chheda
@ 2026-05-05 12:23 ` Umang Chheda
2026-05-05 12:23 ` [PATCH 4/8] ras: aest: Add panic_on_ue module parameter Umang Chheda
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Umang Chheda @ 2026-05-05 12:23 UTC (permalink / raw)
To: Ruidong Tian, Tony Luck, Borislav Petkov, Rob Herring,
Krzysztof Kozlowski, Conor Dooley, Bjorn Andersson, Konrad Dybcio,
catalin.marinas, will, lpieralisi, rafael, mark.rutland,
Sudeep Holla
Cc: linux-arm-msm, linux-acpi, linux-arm-kernel, linux-edac,
linux-kernel, devicetree, linux-edac, Umang Chheda
The record_implemented bitmap uses the same semantics as the rest of
the driver: a SET bit means the record is NOT implemented (skip it),
a CLEAR bit means the record IS implemented (process it).
aest_node_init_debugfs() and aest_node_err_count_show() were iterating
all record_count records unconditionally, creating debugfs entries and
accumulating error counts for unimplemented records too.
Fix both functions to skip records where the corresponding bit is set
in node->record_implemented, consistent with how aest_node_foreach_record()
handles the same bitmap.
Signed-off-by: Umang Chheda <umang.chheda@oss.qualcomm.com>
---
drivers/ras/aest/aest-sysfs.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/ras/aest/aest-sysfs.c b/drivers/ras/aest/aest-sysfs.c
index f710503e4d74..b36190bb3b3e 100644
--- a/drivers/ras/aest/aest-sysfs.c
+++ b/drivers/ras/aest/aest-sysfs.c
@@ -52,7 +52,8 @@ static int aest_node_err_count_show(struct seq_file *m, void *data)
int i;
for (i = 0; i < node->record_count; i++)
- aest_error_count(&node->records[i], &count);
+ if (!test_bit(i, node->record_implemented))
+ aest_error_count(&node->records[i], &count);
seq_printf(m, "CE: %llu\n"
"DE: %llu\n"
@@ -174,8 +175,11 @@ aest_node_init_debugfs(struct aest_node *node)
record = &node->records[i];
if (!record->name)
continue;
+ /* Skip records not implemented on this node. */
+ if (test_bit(i, node->record_implemented))
+ continue;
record->debugfs = debugfs_create_dir(record->name,
- node->debugfs);
+ node->debugfs);
aest_record_init_debugfs(record);
}
}
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 4/8] ras: aest: Add panic_on_ue module parameter
2026-05-05 12:23 [PATCH 0/8] ras: aest: extend AEST support to Device Tree frontend Umang Chheda
` (2 preceding siblings ...)
2026-05-05 12:23 ` [PATCH 3/8] ras: aest: Skip unimplemented records " Umang Chheda
@ 2026-05-05 12:23 ` Umang Chheda
2026-05-05 12:23 ` [PATCH 5/8] dt-bindings: arm: ras: Introduce bindings for ARM AEST Umang Chheda
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Umang Chheda @ 2026-05-05 12:23 UTC (permalink / raw)
To: Ruidong Tian, Tony Luck, Borislav Petkov, Rob Herring,
Krzysztof Kozlowski, Conor Dooley, Bjorn Andersson, Konrad Dybcio,
catalin.marinas, will, lpieralisi, rafael, mark.rutland,
Sudeep Holla
Cc: linux-arm-msm, linux-acpi, linux-arm-kernel, linux-edac,
linux-kernel, devicetree, linux-edac, Umang Chheda
The driver unconditionally calls panic() whenever an unrecoverable,
uncontainable UE (UET_UC or UET_UEU) is detected. There is no way
for the user to suppress this behaviour, which makes it difficult to
test UE injection or to run in environments where a kernel panic on
every UE is undesirable.
Add a module parameter `aest_panic_on_ue` When set to 0 the driver
logs the UE and continues instead of panicking.
Usage:
# Boot time (kernel cmdline)
aest.aest_panic_on_ue=0
# Runtime
echo 0 > /sys/module/aest/parameters/aest_panic_on_ue
Signed-off-by: Umang Chheda <umang.chheda@oss.qualcomm.com>
---
drivers/ras/aest/aest-core.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/ras/aest/aest-core.c b/drivers/ras/aest/aest-core.c
index b4f4c975da1d..9ce782a66edf 100644
--- a/drivers/ras/aest/aest-core.c
+++ b/drivers/ras/aest/aest-core.c
@@ -22,6 +22,11 @@ DEFINE_PER_CPU(struct aest_device, percpu_adev);
#undef pr_fmt
#define pr_fmt(fmt) "AEST: " fmt
+static bool aest_panic_on_ue;
+module_param(aest_panic_on_ue, bool, 0644);
+MODULE_PARM_DESC(aest_panic_on_ue,
+ "Panic on unrecoverable error: 0=off 1=on (default: 1)");
+
#ifdef CONFIG_DEBUG_FS
struct dentry *aest_debugfs;
#endif
@@ -342,9 +347,11 @@ void aest_proc_record(struct aest_record *record, void *data, bool fake)
aest_record_info(
record,
"Simulated error! Skip panic due to fault injection\n");
- else
+ else if (aest_panic_on_ue)
aest_panic(record, ®s,
"AEST: unrecoverable error encountered");
+ else
+ aest_record_err(record, "UE detected, panic suppressed\n");
}
aest_log(record, ®s);
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 5/8] dt-bindings: arm: ras: Introduce bindings for ARM AEST
2026-05-05 12:23 [PATCH 0/8] ras: aest: extend AEST support to Device Tree frontend Umang Chheda
` (3 preceding siblings ...)
2026-05-05 12:23 ` [PATCH 4/8] ras: aest: Add panic_on_ue module parameter Umang Chheda
@ 2026-05-05 12:23 ` Umang Chheda
2026-05-05 12:23 ` [PATCH 6/8] ras: aest: Add DT frontend for ARM AEST RAS error sources Umang Chheda
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Umang Chheda @ 2026-05-05 12:23 UTC (permalink / raw)
To: Ruidong Tian, Tony Luck, Borislav Petkov, Rob Herring,
Krzysztof Kozlowski, Conor Dooley, Bjorn Andersson, Konrad Dybcio,
catalin.marinas, will, lpieralisi, rafael, mark.rutland,
Sudeep Holla
Cc: linux-arm-msm, linux-acpi, linux-arm-kernel, linux-edac,
linux-kernel, devicetree, linux-edac, Umang Chheda
The Arm Error Source Table (AEST) specification describes how firmware
exposes RAS error source topology to the operating system. On ACPI
systems this information is provided via the AEST ACPI table.
Introduce Device Tree bindings that provide an equivalent description
of AEST error sources for DT-based platforms.
Signed-off-by: Umang Chheda <umang.chheda@oss.qualcomm.com>
---
.../devicetree/bindings/arm/arm,aest.yaml | 406 +++++++++++++++++++++
include/dt-bindings/arm/aest.h | 43 +++
2 files changed, 449 insertions(+)
diff --git a/Documentation/devicetree/bindings/arm/arm,aest.yaml b/Documentation/devicetree/bindings/arm/arm,aest.yaml
new file mode 100644
index 000000000000..7809a0d38270
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,aest.yaml
@@ -0,0 +1,406 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,aest.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm Error Source Table (AEST)
+
+maintainers:
+ - Umang Chheda <umang.chheda@oss.qualcomm.com>
+
+description:
+ The Arm Error Source Table (AEST) describes RAS error sources and their
+ register interfaces. Each error source exposes one or more error records
+ through either system registers or a memory-mapped register window, and
+ may signal errors via interrupts. The top-level node acts as a container
+ for one or more child nodes, each describing a single AEST error source.
+ Refer to the Arm AEST specification (DEN0085 / DDI 0587B) for details.
+ Flag bit constants for use in DT source files are defined in
+ <dt-bindings/arm/aest.h>.
+
+properties:
+ compatible:
+ const: arm,aest
+
+ "#address-cells":
+ const: 2
+
+ "#size-cells":
+ const: 2
+
+ ranges: true
+
+required:
+ - compatible
+
+additionalProperties: false
+
+patternProperties:
+ "^aest-[a-z0-9-]+(@[0-9a-f]+)?$":
+ type: object
+ description:
+ An AEST error source node describing one error source defined by
+ the Arm AEST specification.
+
+ properties:
+ compatible:
+ description:
+ Identifies the type of AEST error source. Each value corresponds to
+ a distinct error source class defined by the Arm AEST specification.
+ arm,aest-proxy represents a proxy error source that forwards errors
+ from another error source.
+ enum:
+ - arm,aest-processor
+ - arm,aest-memory
+ - arm,aest-smmu
+ - arm,aest-gic
+ - arm,aest-pcie
+ - arm,aest-vendor
+ - arm,aest-proxy
+
+ reg:
+ description:
+ Register ranges for the error source. Absence of reg implies
+ system-register access (interface type 0). A single range implies
+ memory-mapped access (interface type 1). Two ranges imply
+ single-record memory-mapped access (interface type 2).
+ minItems: 1
+ maxItems: 4
+
+ reg-names:
+ description:
+ Names for the register ranges. The base error-record window is
+ unnamed (or first entry). Optional named ranges provide access to
+ the fault-injection, error-group, and interrupt-config register
+ windows defined by the AEST specification.
+ minItems: 1
+ maxItems: 4
+ items:
+ enum:
+ - fault-inject
+ - err-group
+ - irq-config
+
+ interrupts:
+ description: Interrupts associated with the error source.
+ minItems: 1
+ maxItems: 2
+
+ interrupt-names:
+ description: Names of the interrupts associated with the error source.
+ minItems: 1
+ maxItems: 2
+ items:
+ enum:
+ - fhi
+ - eri
+
+ arm,fhi-flags:
+ description:
+ Bitmask of flags for the fault-handling interrupt (FHI), as defined
+ in the AEST node interrupt structure flags field. Constants are
+ defined in <dt-bindings/arm/aest.h> - AEST_IRQ_MODE_LEVEL (0),
+ AEST_IRQ_MODE_EDGE (1).
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ arm,eri-flags:
+ description:
+ Bitmask of flags for the error-recovery interrupt (ERI), as defined
+ in the AEST node interrupt structure flags field. Constants are
+ defined in <dt-bindings/arm/aest.h>.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ arm,interface-flags:
+ description: |
+ Bitmask of interface flags for the error source, as defined in the
+ AEST node interface flags field. Constants are defined in
+ <dt-bindings/arm/aest.h>:
+ AEST_XFACE_SHARED (bit 0) - shared error source,
+ AEST_XFACE_CLEAR_MISC (bit 1) - clear MISC registers on error,
+ AEST_XFACE_ERROR_DEVICE (bit 2) - error node device present,
+ AEST_XFACE_AFFINITY (bit 3) - affinity information valid,
+ AEST_XFACE_ERROR_GROUP (bit 4) - error group register window present,
+ AEST_XFACE_FAULT_INJECT (bit 5) - fault injection register window present,
+ AEST_XFACE_INT_CONFIG (bit 6) - interrupt config register window present.
+ For system-register interface nodes (no reg property), only
+ AEST_XFACE_CLEAR_MISC is meaningful; the MMIO window flags
+ (AEST_XFACE_ERROR_GROUP, AEST_XFACE_FAULT_INJECT,
+ AEST_XFACE_INT_CONFIG) have no effect without a base address.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ arm,group-format:
+ description: |
+ Page-granularity of the error record group register window, which
+ determines the MMIO mapping size, the number of ERRGSR registers,
+ and the width of the record-implemented and status-reporting bitmaps.
+ Constants are defined in <dt-bindings/arm/aest.h>:
+ AEST_GROUP_FORMAT_4K (0) - 4K window, 1 ERRGSR, up to 64 records,
+ AEST_GROUP_FORMAT_16K (1) - 16K window, 4 ERRGSRs, up to 256 records,
+ AEST_GROUP_FORMAT_64K (2) - 64K window, 14 ERRGSRs, up to 896 records.
+ Required for memory-mapped nodes (reg present) where it controls
+ the ioremap size and ERRGSR layout. For system-register nodes
+ (no reg property) this property is optional and defaults to
+ AEST_GROUP_FORMAT_4K.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2]
+
+ arm,num-records:
+ description: Number of error records implemented by this error source.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ arm,record-impl:
+ description:
+ Bitmap of implemented error records within this error source. Bit N
+ set to 0 means error record N is implemented and must be polled.
+ $ref: /schemas/types.yaml#/definitions/uint64-array
+
+ arm,status-reporting:
+ description:
+ Bitmap indicating which error records support status reporting via
+ the ERRGSR register. Bit N set to 1 means record N does not report
+ through ERRGSR and must be polled explicitly.
+ $ref: /schemas/types.yaml#/definitions/uint64-array
+
+ arm,addressing-mode:
+ description:
+ Bitmap indicating the address type reported in ERR_ADDR for each
+ error record. Bit N set to 0 means record N reports System Physical
+ Addresses (SPA); bit N set to 1 means record N reports node-specific
+ Logical Addresses (LA) that require OS translation to SPA.
+ $ref: /schemas/types.yaml#/definitions/uint64-array
+
+ arm,processor-flags:
+ description:
+ Bitmask indicating the scope of a processor error source, as defined
+ in the AEST processor node flags field. Constants are defined in
+ <dt-bindings/arm/aest.h> - AEST_PROC_GLOBAL (bit 0),
+ AEST_PROC_SHARED (bit 1).
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ arm,resource-type:
+ description: |
+ Type of processor resource associated with this error source.
+ Constants are defined in <dt-bindings/arm/aest.h>:
+ AEST_RESOURCE_CACHE (0),
+ AEST_RESOURCE_TLB (1),
+ AEST_RESOURCE_GENERIC (2).
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2]
+
+ arm,cache-ref:
+ description:
+ Phandle to the cache node associated with this processor error source.
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ arm,tlb-level:
+ description: TLB level identifier for this processor TLB error source.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ arm,resource-ref:
+ description:
+ Generic resource reference identifier for this processor error source.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ arm,proximity-domain:
+ description:
+ SRAT proximity domain of the memory node associated with this error
+ source.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ arm,smmu-ref:
+ description:
+ Phandle to the SMMU node in the IORT associated with this error
+ source.
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ arm,smmu-subcomponent:
+ description:
+ SMMU subcomponent reference identifier for this error source, as
+ defined in the AEST SMMU node structure.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ arm,gic-type:
+ description: |
+ GIC component type for this error source, as defined in the AEST GIC
+ node structure. Constants are defined in <dt-bindings/arm/aest.h>:
+ AEST_GIC_CPU (0),
+ AEST_GIC_DISTRIBUTOR (1),
+ AEST_GIC_REDISTRIBUTOR (2),
+ AEST_GIC_ITS (3).
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+
+ arm,gic-instance:
+ description:
+ GIC instance identifier for this error source, used to distinguish
+ multiple instances of the same GIC component type.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ arm,pcie-segment:
+ description:
+ PCI segment number of the PCIe root port associated with this error
+ source, corresponding to the IORT node reference.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ arm,vendor-hid:
+ description:
+ 8-character ACPI Hardware ID string identifying the vendor error
+ source, as defined in the AEST vendor node structure.
+ $ref: /schemas/types.yaml#/definitions/string
+
+ arm,vendor-uid:
+ description:
+ ACPI unique instance identifier for this vendor error source, used
+ to distinguish multiple instances with the same hardware ID.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ required:
+ - compatible
+ - arm,num-records
+
+ allOf:
+ - if:
+ required:
+ - reg
+ then:
+ required:
+ - arm,group-format
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: arm,aest-processor
+ then:
+ properties:
+ arm,processor-flags: {}
+ arm,resource-type: {}
+ arm,cache-ref: {}
+ arm,tlb-level: {}
+ arm,resource-ref: {}
+ else:
+ properties:
+ arm,processor-flags: false
+ arm,resource-type: false
+ arm,cache-ref: false
+ arm,tlb-level: false
+ arm,resource-ref: false
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: arm,aest-memory
+ then:
+ required:
+ - arm,proximity-domain
+ properties:
+ arm,proximity-domain: {}
+ else:
+ properties:
+ arm,proximity-domain: false
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: arm,aest-smmu
+ then:
+ required:
+ - arm,smmu-ref
+ properties:
+ arm,smmu-ref: {}
+ arm,smmu-subcomponent: {}
+ else:
+ properties:
+ arm,smmu-ref: false
+ arm,smmu-subcomponent: false
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: arm,aest-gic
+ then:
+ properties:
+ arm,gic-type: {}
+ arm,gic-instance: {}
+ else:
+ properties:
+ arm,gic-type: false
+ arm,gic-instance: false
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: arm,aest-pcie
+ then:
+ required:
+ - arm,pcie-segment
+ properties:
+ arm,pcie-segment: {}
+ else:
+ properties:
+ arm,pcie-segment: false
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: arm,aest-vendor
+ then:
+ required:
+ - arm,vendor-hid
+ properties:
+ arm,vendor-hid: {}
+ arm,vendor-uid: {}
+ else:
+ properties:
+ arm,vendor-hid: false
+ arm,vendor-uid: false
+
+ unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/arm/aest.h>
+
+ aest {
+ compatible = "arm,aest";
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ /* System-register based processor error source (no reg property) */
+ aest-processor-0 {
+ compatible = "arm,aest-processor";
+ arm,num-records = <2>;
+ arm,record-impl = /bits/ 64 <0x3>;
+ arm,status-reporting = /bits/ 64 <0x0>;
+ arm,addressing-mode = /bits/ 64 <0x0>;
+ arm,processor-flags = <AEST_PROC_GLOBAL>;
+ arm,resource-type = <AEST_RESOURCE_CACHE>;
+ interrupts = <GIC_PPI 0 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "fhi";
+ };
+
+ /* Memory-mapped memory controller error source */
+ aest-memory-0@50010000 {
+ compatible = "arm,aest-memory";
+ reg = <0x0 0x50010000 0x0 0x1000>,
+ <0x0 0x50011000 0x0 0x1000>,
+ <0x0 0x50012000 0x0 0x1000>;
+ reg-names = "err-group", "fault-inject", "irq-config";
+ arm,group-format = <AEST_GROUP_FORMAT_4K>;
+ arm,num-records = <4>;
+ arm,record-impl = /bits/ 64 <0xf>;
+ arm,status-reporting = /bits/ 64 <0x0>;
+ arm,addressing-mode = /bits/ 64 <0x0>;
+ arm,interface-flags = <AEST_XFACE_ERROR_GROUP>;
+ arm,proximity-domain = <0>;
+ interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "fhi", "eri";
+ };
+ };
diff --git a/include/dt-bindings/arm/aest.h b/include/dt-bindings/arm/aest.h
new file mode 100644
index 000000000000..43679314e98e
--- /dev/null
+++ b/include/dt-bindings/arm/aest.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * This header provides constants for the Arm Error Source Table (AEST)
+ * DT binding (Documentation/devicetree/bindings/arm/arm,aest.yaml).
+ */
+
+#ifndef _DT_BINDINGS_ARM_AEST_H
+#define _DT_BINDINGS_ARM_AEST_H
+
+/* arm,interface-flags - AEST node interface flags field */
+#define AEST_XFACE_SHARED 1
+#define AEST_XFACE_CLEAR_MISC 2
+#define AEST_XFACE_ERROR_DEVICE 4
+#define AEST_XFACE_AFFINITY 8
+#define AEST_XFACE_ERROR_GROUP 16
+#define AEST_XFACE_FAULT_INJECT 32
+#define AEST_XFACE_INT_CONFIG 64
+
+/* arm,fhi-flags / arm,eri-flags - AEST node interrupt flags field */
+#define AEST_IRQ_MODE_LEVEL 0
+#define AEST_IRQ_MODE_EDGE 1
+
+/* arm,processor-flags - AEST processor node flags field */
+#define AEST_PROC_GLOBAL 1
+#define AEST_PROC_SHARED 2
+
+/* arm,group-format - error record group register window page size */
+#define AEST_GROUP_FORMAT_4K 0
+#define AEST_GROUP_FORMAT_16K 1
+#define AEST_GROUP_FORMAT_64K 2
+
+/* arm,resource-type - processor resource type */
+#define AEST_RESOURCE_CACHE 0
+#define AEST_RESOURCE_TLB 1
+#define AEST_RESOURCE_GENERIC 2
+
+/* arm,gic-type - GIC component type */
+#define AEST_GIC_CPU 0
+#define AEST_GIC_DISTRIBUTOR 1
+#define AEST_GIC_REDISTRIBUTOR 2
+#define AEST_GIC_ITS 3
+
+#endif /* _DT_BINDINGS_ARM_AEST_H */
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 6/8] ras: aest: Add DT frontend for ARM AEST RAS error sources
2026-05-05 12:23 [PATCH 0/8] ras: aest: extend AEST support to Device Tree frontend Umang Chheda
` (4 preceding siblings ...)
2026-05-05 12:23 ` [PATCH 5/8] dt-bindings: arm: ras: Introduce bindings for ARM AEST Umang Chheda
@ 2026-05-05 12:23 ` Umang Chheda
2026-05-05 12:23 ` [PATCH 7/8] arm64: dts: qcom: lemans: add AEST error nodes Umang Chheda
2026-05-05 12:23 ` [PATCH 8/8] arm64: dts: qcom: monaco: " Umang Chheda
7 siblings, 0 replies; 9+ messages in thread
From: Umang Chheda @ 2026-05-05 12:23 UTC (permalink / raw)
To: Ruidong Tian, Tony Luck, Borislav Petkov, Rob Herring,
Krzysztof Kozlowski, Conor Dooley, Bjorn Andersson, Konrad Dybcio,
catalin.marinas, will, lpieralisi, rafael, mark.rutland,
Sudeep Holla
Cc: linux-arm-msm, linux-acpi, linux-arm-kernel, linux-edac,
linux-kernel, devicetree, linux-edac, Umang Chheda
Add a Device Tree frontend for the Arm AEST RAS framework, allowing the
existing AEST core driver to be used on DT-only systems.
The DT frontend parses the "arm,aest" Device Tree hierarchy and populates
the same internal structures as the ACPI-based implementation. It is
initialized at the same layer as ACPI and is mutually exclusive with it,
ensuring identical behaviour regardless of the firmware interface in use.
Signed-off-by: Umang Chheda <umang.chheda@oss.qualcomm.com>
---
drivers/ras/aest/Kconfig | 15 +-
drivers/ras/aest/Makefile | 2 +
drivers/ras/aest/aest-of.c | 673 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 688 insertions(+), 2 deletions(-)
diff --git a/drivers/ras/aest/Kconfig b/drivers/ras/aest/Kconfig
index 0b09a5d5acce..ca034255fadd 100644
--- a/drivers/ras/aest/Kconfig
+++ b/drivers/ras/aest/Kconfig
@@ -7,11 +7,22 @@
config AEST
tristate "ARM AEST Driver"
- depends on ACPI_AEST && RAS
-
+ depends on ACPI_AEST || OF_AEST
+ depends on RAS
help
The Arm Error Source Table (AEST) provides details on ACPI
extensions that enable kernel-first handling of errors in a
system that supports the Armv8 RAS extensions.
If set, the kernel will report and log hardware errors.
+
+config OF_AEST
+ bool "ARM Error Source Table DT Support"
+ depends on ARM64_RAS_EXTN && OF
+ help
+ Enable support for discovering ARM RAS error sources using the
+ Device Tree based Arm Error Source Table (AEST) specification.
+ This allows the kernel to enumerate and manage hardware error
+ reporting blocks described in firmware for ARMv8 and later
+ systems. Select this option if your platform describes AEST
+ nodes in Device Tree and relies on RAS error handling.
diff --git a/drivers/ras/aest/Makefile b/drivers/ras/aest/Makefile
index e5a45fde6d36..2997952901c0 100644
--- a/drivers/ras/aest/Makefile
+++ b/drivers/ras/aest/Makefile
@@ -6,3 +6,5 @@ aest-y := aest-core.o
aest-y += aest-sysfs.o
aest-y += aest-inject.o
aest-y += aest-cmn.o
+
+obj-$(CONFIG_OF_AEST) += aest-of.o
diff --git a/drivers/ras/aest/aest-of.c b/drivers/ras/aest/aest-of.c
new file mode 100644
index 000000000000..939db2c41742
--- /dev/null
+++ b/drivers/ras/aest/aest-of.c
@@ -0,0 +1,673 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ */
+
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+#include <linux/acpi_aest.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) "DT AEST: " fmt
+
+struct dt_aest_priv {
+ struct xarray aest_array;
+ u32 node_id;
+};
+
+static const struct of_device_id dt_aest_child_match[] = {
+ { .compatible = "arm,aest-processor", .data = (void *)ACPI_AEST_PROCESSOR_ERROR_NODE },
+ { .compatible = "arm,aest-memory", .data = (void *)ACPI_AEST_MEMORY_ERROR_NODE },
+ { .compatible = "arm,aest-smmu", .data = (void *)ACPI_AEST_SMMU_ERROR_NODE },
+ { .compatible = "arm,aest-vendor", .data = (void *)ACPI_AEST_VENDOR_ERROR_NODE },
+ { .compatible = "arm,aest-gic", .data = (void *)ACPI_AEST_GIC_ERROR_NODE },
+ { .compatible = "arm,aest-pcie", .data = (void *)ACPI_AEST_PCIE_ERROR_NODE },
+ { .compatible = "arm,aest-proxy", .data = (void *)ACPI_AEST_PROXY_ERROR_NODE },
+ { }
+};
+
+static int dt_aest_node_type(struct device_node *np)
+{
+ const struct of_device_id *match;
+
+ match = of_match_node(dt_aest_child_match, np);
+ if (!match) {
+ pr_warn("unknown compatible for %pOF\n", np);
+ return -EINVAL;
+ }
+ return (int)(uintptr_t)match->data;
+}
+
+static struct aest_hnode *dt_aest_alloc_hnode(int node_type, u32 id)
+{
+ struct aest_hnode *ahnode;
+
+ ahnode = kzalloc_obj(*ahnode, GFP_KERNEL);
+ if (!ahnode)
+ return NULL;
+
+ INIT_LIST_HEAD(&ahnode->list);
+ ahnode->count = 0;
+ ahnode->id = id;
+ ahnode->type = node_type;
+ return ahnode;
+}
+
+static int dt_aest_build_interface(struct device_node *np,
+ struct acpi_aest_node *anode)
+{
+ struct acpi_aest_node_interface_header *hdr;
+ struct acpi_aest_node_interface_common *common;
+ struct resource res;
+ struct resource named_res;
+ u32 gfmt = 0, flags = 0, nrec = 1;
+ u32 itype;
+ int ret;
+ size_t body_sz;
+
+ /*
+ * Deduce interface type from the presence and count of reg entries:
+ * no reg -> system-register access (type 0)
+ * 1 range -> memory-mapped access (type 1)
+ * 2+ ranges -> single-record MMIO (type 2)
+ */
+ if (!of_property_present(np, "reg"))
+ itype = ACPI_AEST_NODE_SYSTEM_REGISTER;
+ else if (of_property_count_elems_of_size(np, "reg", sizeof(u32)) <=
+ (of_n_addr_cells(np) + of_n_size_cells(np)))
+ itype = ACPI_AEST_NODE_MEMORY_MAPPED;
+ else
+ itype = ACPI_AEST_NODE_SINGLE_RECORD_MEMORY_MAPPED;
+
+ of_property_read_u32(np, "arm,group-format", &gfmt);
+ of_property_read_u32(np, "arm,interface-flags", &flags);
+ of_property_read_u32(np, "arm,num-records", &nrec);
+
+ switch (gfmt) {
+ case ACPI_AEST_NODE_GROUP_FORMAT_16K:
+ body_sz = sizeof(struct acpi_aest_node_interface_16k);
+ break;
+ case ACPI_AEST_NODE_GROUP_FORMAT_64K:
+ body_sz = sizeof(struct acpi_aest_node_interface_64k);
+ break;
+ default:
+ body_sz = sizeof(struct acpi_aest_node_interface_4k);
+ break;
+ }
+
+ hdr = kzalloc(sizeof(*hdr) + body_sz, GFP_KERNEL);
+ if (!hdr)
+ return -ENOMEM;
+
+ /* Fill header */
+ hdr->type = (u8)itype;
+ hdr->group_format = (u8)gfmt;
+ hdr->flags = flags;
+ hdr->error_record_count = nrec;
+ hdr->error_record_index = 0;
+
+ if (itype != ACPI_AEST_NODE_SYSTEM_REGISTER) {
+ ret = of_address_to_resource(np, 0, &res);
+ if (ret) {
+ pr_err("node %pOF: missing 'reg' for MMIO interface\n", np);
+ kfree(hdr);
+ return ret;
+ }
+ hdr->address = res.start;
+ }
+
+ switch (gfmt) {
+ case ACPI_AEST_NODE_GROUP_FORMAT_4K: {
+ struct acpi_aest_node_interface_4k *b =
+ (struct acpi_aest_node_interface_4k *)(hdr + 1);
+ of_property_read_u64(np, "arm,record-impl",
+ &b->error_record_implemented);
+ of_property_read_u64(np, "arm,status-reporting",
+ &b->error_status_reporting);
+ of_property_read_u64(np, "arm,addressing-mode",
+ &b->addressing_mode);
+ common = &b->common;
+ anode->record_implemented =
+ (unsigned long *)&b->error_record_implemented;
+ anode->status_reporting =
+ (unsigned long *)&b->error_status_reporting;
+ anode->addressing_mode =
+ (unsigned long *)&b->addressing_mode;
+ break;
+ }
+ case ACPI_AEST_NODE_GROUP_FORMAT_16K: {
+ struct acpi_aest_node_interface_16k *b =
+ (struct acpi_aest_node_interface_16k *)(hdr + 1);
+ of_property_read_u64_array(np, "arm,record-impl",
+ b->error_record_implemented, 4);
+ of_property_read_u64_array(np, "arm,status-reporting",
+ b->error_status_reporting, 4);
+ of_property_read_u64_array(np, "arm,addressing-mode",
+ b->addressing_mode, 4);
+ common = &b->common;
+ anode->record_implemented =
+ (unsigned long *)b->error_record_implemented;
+ anode->status_reporting =
+ (unsigned long *)b->error_status_reporting;
+ anode->addressing_mode =
+ (unsigned long *)b->addressing_mode;
+ break;
+ }
+ case ACPI_AEST_NODE_GROUP_FORMAT_64K: {
+ struct acpi_aest_node_interface_64k *b =
+ (struct acpi_aest_node_interface_64k *)(hdr + 1);
+ of_property_read_u64_array(np, "arm,record-impl",
+ b->error_record_implemented, 14);
+ of_property_read_u64_array(np, "arm,status-reporting",
+ b->error_status_reporting, 14);
+ of_property_read_u64_array(np, "arm,addressing-mode",
+ b->addressing_mode, 14);
+ common = &b->common;
+ anode->record_implemented =
+ (unsigned long *)b->error_record_implemented;
+ anode->status_reporting =
+ (unsigned long *)b->error_status_reporting;
+ anode->addressing_mode =
+ (unsigned long *)b->addressing_mode;
+ break;
+ }
+ default:
+ pr_err("node %pOF: unsupported group-format %u\n", np, gfmt);
+ kfree(hdr);
+ return -EINVAL;
+ }
+
+ if (!of_address_to_resource(np, of_property_match_string(
+ np, "reg-names", "fault-inject"), &named_res))
+ common->fault_inject_register_base = named_res.start;
+
+ if (!of_address_to_resource(np, of_property_match_string(
+ np, "reg-names", "err-group"), &named_res))
+ common->error_group_register_base = named_res.start;
+
+ if (!of_address_to_resource(np, of_property_match_string(
+ np, "reg-names", "irq-config"), &named_res))
+ common->interrupt_config_register_base = named_res.start;
+
+ anode->interface_hdr = hdr;
+ anode->common = common;
+
+ return 0;
+}
+
+static int dt_aest_build_interrupt(struct device_node *np,
+ struct acpi_aest_node *anode)
+{
+ struct acpi_aest_node_interrupt_v2 *irq_arr;
+ int fhi_irq, eri_irq, count = 0;
+ u32 fhi_flags = 0, eri_flags = 0;
+
+ of_property_read_u32(np, "arm,fhi-flags", &fhi_flags);
+ of_property_read_u32(np, "arm,eri-flags", &eri_flags);
+
+ fhi_irq = of_irq_get_byname(np, "fhi");
+ if (fhi_irq == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+ if (fhi_irq < 0 && fhi_irq != -EINVAL) {
+ const char *name = NULL;
+
+ of_property_read_string(np, "interrupt-names", &name);
+
+ pr_warn("node %pOF: failed to map FHI IRQ: %d (interrupt-names[0]=\"%s\", want \"%s\")\n",
+ np, fhi_irq, name ?: "<missing>", "fhi");
+ }
+ eri_irq = of_irq_get_byname(np, "eri");
+ if (eri_irq == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+ if (eri_irq < 0 && eri_irq != -EINVAL) {
+ const char *name = NULL;
+
+ of_property_read_string_index(np, "interrupt-names", 1, &name);
+
+ pr_warn("node %pOF: failed to map ERI IRQ: %d (interrupt-names[1]=\"%s\", want \"%s\")\n",
+ np, eri_irq, name ?: "<missing>", "eri");
+ }
+
+ if (fhi_irq > 0)
+ count++;
+ if (eri_irq > 0)
+ count++;
+
+ if (!count) {
+ anode->interrupt = NULL;
+ anode->interrupt_count = 0;
+ return 0;
+ }
+
+ irq_arr = kcalloc(count, sizeof(*irq_arr), GFP_KERNEL);
+ if (!irq_arr)
+ return -ENOMEM;
+
+ count = 0;
+ if (fhi_irq > 0) {
+ irq_arr[count].gsiv = fhi_irq;
+ irq_arr[count].flags = AEST_INTERRUPT_MODE | fhi_flags;
+ irq_arr[count].type = ACPI_AEST_NODE_FAULT_HANDLING;
+ count++;
+ }
+ if (eri_irq > 0) {
+ irq_arr[count].gsiv = eri_irq;
+ irq_arr[count].flags = eri_flags;
+ irq_arr[count].type = ACPI_AEST_NODE_ERROR_RECOVERY;
+ count++;
+ }
+
+ anode->interrupt = irq_arr;
+ anode->interrupt_count = count;
+ return 0;
+}
+
+static int dt_aest_build_node_specific(struct device_node *np,
+ struct acpi_aest_node *anode,
+ int node_type)
+{
+ switch (node_type) {
+
+ case ACPI_AEST_PROCESSOR_ERROR_NODE: {
+ struct acpi_aest_processor *proc;
+ u32 rtype = 0, pflags = 0;
+
+ proc = kzalloc_obj(*proc, GFP_KERNEL);
+ if (!proc)
+ return -ENOMEM;
+
+ of_property_read_u32(np, "arm,resource-type", &rtype);
+ of_property_read_u32(np, "arm,processor-flags", &pflags);
+
+ proc->resource_type = (u8)rtype;
+ proc->flags = (u8)pflags;
+
+ /* Processor cache/TLB/generic sub-structure */
+ switch (rtype) {
+ case ACPI_AEST_CACHE_RESOURCE: {
+ struct acpi_aest_processor_cache *c;
+ struct device_node *cache_np;
+
+ c = kzalloc_obj(*c, GFP_KERNEL);
+ if (!c) {
+ kfree(proc);
+ return -ENOMEM;
+ }
+
+ cache_np = of_parse_phandle(np, "arm,cache-ref", 0);
+ if (cache_np) {
+ c->cache_reference = cache_np->phandle;
+ of_node_put(cache_np);
+ }
+ anode->cache = c;
+ break;
+ }
+ case ACPI_AEST_TLB_RESOURCE: {
+ struct acpi_aest_processor_tlb *t;
+
+ t = kzalloc_obj(*t, GFP_KERNEL);
+ if (!t) {
+ kfree(proc);
+ return -ENOMEM;
+ }
+ of_property_read_u32(np, "arm,tlb-level",
+ &t->tlb_level);
+ anode->tlb = t;
+ break;
+ }
+ default: {
+ struct acpi_aest_processor_generic *g;
+
+ g = kzalloc_obj(*g, GFP_KERNEL);
+ if (!g) {
+ kfree(proc);
+ return -ENOMEM;
+ }
+ of_property_read_u32(np, "arm,resource-ref",
+ &g->resource);
+ anode->generic = g;
+ break;
+ }
+ }
+ anode->processor = proc;
+ break;
+ }
+
+ case ACPI_AEST_MEMORY_ERROR_NODE: {
+ struct acpi_aest_memory *mem;
+
+ mem = kzalloc_obj(*mem, GFP_KERNEL);
+
+ if (!mem)
+ return -ENOMEM;
+ of_property_read_u32(np, "arm,proximity-domain",
+ &mem->srat_proximity_domain);
+ anode->memory = mem;
+ break;
+ }
+
+ case ACPI_AEST_SMMU_ERROR_NODE: {
+ struct acpi_aest_smmu *smmu;
+ struct device_node *smmu_np;
+
+ smmu = kzalloc_obj(*smmu, GFP_KERNEL);
+
+ if (!smmu)
+ return -ENOMEM;
+ smmu_np = of_parse_phandle(np, "arm,smmu-ref", 0);
+ if (smmu_np) {
+ /* Use the DT node offset as the IORT reference */
+ smmu->iort_node_reference = smmu_np->phandle;
+ of_node_put(smmu_np);
+ }
+ of_property_read_u32(np, "arm,smmu-subcomponent",
+ &smmu->subcomponent_reference);
+ anode->smmu = smmu;
+ break;
+ }
+
+ case ACPI_AEST_VENDOR_ERROR_NODE: {
+ struct acpi_aest_vendor_v2 *vendor;
+ const char *hid = "ARMHC000";
+
+ vendor = kzalloc_obj(*vendor, GFP_KERNEL);
+
+ if (!vendor)
+ return -ENOMEM;
+ of_property_read_string(np, "arm,vendor-hid", &hid);
+ strscpy(vendor->acpi_hid, hid, sizeof(vendor->acpi_hid));
+ of_property_read_u32(np, "arm,vendor-uid",
+ &vendor->acpi_uid);
+ anode->vendor = vendor;
+ break;
+ }
+
+ case ACPI_AEST_GIC_ERROR_NODE: {
+ struct acpi_aest_gic *gic;
+
+ gic = kzalloc_obj(*gic, GFP_KERNEL);
+
+ if (!gic)
+ return -ENOMEM;
+ of_property_read_u32(np, "arm,gic-type",
+ &gic->interface_type);
+ of_property_read_u32(np, "arm,gic-instance",
+ &gic->instance_id);
+ anode->gic = gic;
+ break;
+ }
+
+ case ACPI_AEST_PCIE_ERROR_NODE: {
+ struct acpi_aest_pcie *pcie;
+
+ pcie = kzalloc_obj(*pcie, GFP_KERNEL);
+
+ if (!pcie)
+ return -ENOMEM;
+ of_property_read_u32(np, "arm,pcie-segment",
+ &pcie->iort_node_reference);
+ anode->pcie = pcie;
+ break;
+ }
+
+ case ACPI_AEST_PROXY_ERROR_NODE:
+ /* No node-specific data for proxy nodes */
+ anode->spec_pointer = NULL;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct acpi_aest_node *
+dt_aest_alloc_anode(struct device_node *np, int node_type)
+{
+ struct acpi_aest_node *anode;
+ int ret;
+
+ anode = kzalloc_obj(*anode, GFP_KERNEL);
+ if (!anode)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&anode->list);
+ anode->type = node_type;
+
+ ret = dt_aest_build_interface(np, anode);
+ if (ret)
+ goto err_free;
+
+ ret = dt_aest_build_node_specific(np, anode, node_type);
+ if (ret)
+ goto err_free;
+
+ ret = dt_aest_build_interrupt(np, anode);
+ if (ret)
+ goto err_free;
+
+ return anode;
+
+err_free:
+ kfree(anode->interface_hdr);
+ kfree(anode->spec_pointer);
+ kfree(anode->processor_spec_pointer);
+ kfree(anode);
+ return ERR_PTR(ret);
+}
+
+static int dt_aest_init_one_node(struct device_node *np,
+ struct dt_aest_priv *priv)
+{
+ int node_type;
+ struct aest_hnode *ahnode;
+ struct acpi_aest_node *anode;
+
+ node_type = dt_aest_node_type(np);
+ if (node_type < 0) {
+ pr_warn("unknown node type for %pOF, skipping\n", np);
+ return 0;
+ }
+
+ ahnode = dt_aest_alloc_hnode(node_type, priv->node_id);
+ if (!ahnode)
+ return -ENOMEM;
+
+ anode = dt_aest_alloc_anode(np, node_type);
+ if (IS_ERR(anode)) {
+ kfree(ahnode);
+ return PTR_ERR(anode);
+ }
+
+ list_add_tail(&anode->list, &ahnode->list);
+ ahnode->count = 1;
+
+ if (xa_err(xa_store(&priv->aest_array, priv->node_id,
+ ahnode, GFP_KERNEL))) {
+ kfree(anode);
+ kfree(ahnode);
+ return -ENOMEM;
+ }
+ priv->node_id++;
+ return 0;
+}
+
+static int dt_aest_init_nodes(struct device_node *aest_root,
+ struct dt_aest_priv *priv)
+{
+ struct device_node *np;
+ int ret;
+
+ for_each_available_child_of_node(aest_root, np) {
+ ret = dt_aest_init_one_node(np, priv);
+ if (ret) {
+ pr_err("failed to init node %pOF: %d\n", np, ret);
+ of_node_put(np);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static struct platform_device *dt_aest_alloc_pdev(struct aest_hnode *ahnode,
+ int index)
+{
+ struct platform_device *pdev;
+ struct resource *res;
+ struct acpi_aest_node *anode;
+ int ret, size, j;
+ int irq[AEST_MAX_INTERRUPT_PER_NODE] = { 0 };
+
+ pdev = platform_device_alloc("AEST", index);
+ if (!pdev)
+ return ERR_PTR(-ENOMEM);
+
+ res = kcalloc(ahnode->count + AEST_MAX_INTERRUPT_PER_NODE,
+ sizeof(*res), GFP_KERNEL);
+ if (!res) {
+ platform_device_put(pdev);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ j = 0;
+ list_for_each_entry(anode, &ahnode->list, list) {
+ if (anode->interface_hdr->type !=
+ ACPI_AEST_NODE_SYSTEM_REGISTER) {
+ res[j].name = AEST_NODE_NAME;
+ res[j].start = anode->interface_hdr->address;
+
+ switch (anode->interface_hdr->group_format) {
+ case ACPI_AEST_NODE_GROUP_FORMAT_4K:
+ size = 4 * KB; break;
+ case ACPI_AEST_NODE_GROUP_FORMAT_16K:
+ size = 16 * KB; break;
+ case ACPI_AEST_NODE_GROUP_FORMAT_64K:
+ size = 64 * KB; break;
+ default:
+ size = 4 * KB;
+ }
+ res[j].end = res[j].start + size - 1;
+ res[j].flags = IORESOURCE_MEM;
+ j++;
+ }
+
+ if (anode->interrupt && anode->interrupt_count > 0) {
+ int k;
+
+ for (k = 0; k < anode->interrupt_count &&
+ k < AEST_MAX_INTERRUPT_PER_NODE; k++) {
+
+ struct acpi_aest_node_interrupt_v2 *intr =
+ &anode->interrupt[k];
+ int itype = intr->type;
+ int virq = intr->gsiv;
+ struct irq_data *irqd;
+
+ if (!virq)
+ continue;
+ if (itype >= AEST_MAX_INTERRUPT_PER_NODE)
+ continue;
+ if (irq[itype] == virq)
+ continue;
+ irq[itype] = virq;
+ /*
+ * aest_config_irq() writes intr->gsiv directly
+ * to the hardware IRQ-config register, so it
+ * must hold the GIC hardware SPI number, not the
+ * Linux virtual IRQ. Convert here now that we
+ * have the virq in hand; the resource still gets
+ * the virq so devm_request_irq() works correctly.
+ */
+ irqd = irq_get_irq_data(virq);
+ if (irqd)
+ intr->gsiv = irqd->hwirq;
+
+ res[j].name = (itype == ACPI_AEST_NODE_FAULT_HANDLING)
+ ? AEST_FHI_NAME : AEST_ERI_NAME;
+ res[j].start = virq;
+ res[j].end = virq;
+ res[j].flags = IORESOURCE_IRQ;
+ j++;
+ }
+ }
+ }
+
+ ret = platform_device_add_resources(pdev, res, j);
+ kfree(res);
+ if (ret) {
+ platform_device_put(pdev);
+ return ERR_PTR(ret);
+ }
+
+ ret = platform_device_add_data(pdev, &ahnode, sizeof(ahnode));
+ if (ret) {
+ platform_device_put(pdev);
+ return ERR_PTR(ret);
+ }
+
+ ret = platform_device_add(pdev);
+ if (ret) {
+ platform_device_put(pdev);
+ return ERR_PTR(ret);
+ }
+
+ return pdev;
+}
+
+static int dt_aest_alloc_pdevs(struct dt_aest_priv *priv)
+{
+ struct aest_hnode *ahnode;
+ unsigned long i;
+ int ret = 0, index = 0;
+
+ xa_for_each(&priv->aest_array, i, ahnode) {
+ struct platform_device *pdev =
+ dt_aest_alloc_pdev(ahnode, index++);
+ if (IS_ERR(pdev)) {
+ ret = PTR_ERR(pdev);
+ pr_err("failed to alloc pdev for node %u: %d\n",
+ ahnode->id, ret);
+ break;
+ }
+ }
+ return ret;
+}
+
+static int __init dt_aest_init(void)
+{
+ struct device_node *aest_root;
+ struct dt_aest_priv priv = {};
+ int ret;
+
+ if (!acpi_disabled)
+ return 0;
+
+ aest_root = of_find_compatible_node(NULL, NULL, "arm,aest");
+ if (!aest_root)
+ return 0;
+
+ xa_init(&priv.aest_array);
+
+ ret = dt_aest_init_nodes(aest_root, &priv);
+ of_node_put(aest_root);
+ if (ret) {
+ pr_err("failed to init AEST nodes: %d\n", ret);
+ return ret;
+ }
+
+ ret = dt_aest_alloc_pdevs(&priv);
+ if (ret) {
+ pr_err("failed to alloc AEST pdevs: %d\n", ret);
+ return ret;
+ }
+
+ pr_info("registered %u AEST error source(s) from DT\n", priv.node_id);
+
+ return 0;
+}
+subsys_initcall_sync(dt_aest_init);
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 7/8] arm64: dts: qcom: lemans: add AEST error nodes
2026-05-05 12:23 [PATCH 0/8] ras: aest: extend AEST support to Device Tree frontend Umang Chheda
` (5 preceding siblings ...)
2026-05-05 12:23 ` [PATCH 6/8] ras: aest: Add DT frontend for ARM AEST RAS error sources Umang Chheda
@ 2026-05-05 12:23 ` Umang Chheda
2026-05-05 12:23 ` [PATCH 8/8] arm64: dts: qcom: monaco: " Umang Chheda
7 siblings, 0 replies; 9+ messages in thread
From: Umang Chheda @ 2026-05-05 12:23 UTC (permalink / raw)
To: Ruidong Tian, Tony Luck, Borislav Petkov, Rob Herring,
Krzysztof Kozlowski, Conor Dooley, Bjorn Andersson, Konrad Dybcio,
catalin.marinas, will, lpieralisi, rafael, mark.rutland,
Sudeep Holla
Cc: linux-arm-msm, linux-acpi, linux-arm-kernel, linux-edac,
linux-kernel, devicetree, linux-edac, Umang Chheda,
Faruque Ansari
Add AEST RAS error source nodes for the Lemans SoC.
The DT describes a processor error source covering all CPU cores and a
shared L3 cache error source for the cluster. These nodes model the
hardware error reporting blocks and associated interrupts as required
by the Arm AEST specification.
Co-developed-by: Faruque Ansari <faruque.ansari@oss.qualcomm.com>
Signed-off-by: Faruque Ansari <faruque.ansari@oss.qualcomm.com>
Signed-off-by: Umang Chheda <umang.chheda@oss.qualcomm.com>
---
arch/arm64/boot/dts/qcom/lemans.dtsi | 41 ++++++++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)
diff --git a/arch/arm64/boot/dts/qcom/lemans.dtsi b/arch/arm64/boot/dts/qcom/lemans.dtsi
index fe6e76351823..199ea1f9a8d5 100644
--- a/arch/arm64/boot/dts/qcom/lemans.dtsi
+++ b/arch/arm64/boot/dts/qcom/lemans.dtsi
@@ -4,6 +4,7 @@
* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
+#include <dt-bindings/arm/aest.h>
#include <dt-bindings/interconnect/qcom,icc.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/clock/qcom,dsi-phy-28nm.h>
@@ -29,6 +30,46 @@ / {
#address-cells = <2>;
#size-cells = <2>;
+ aest {
+ compatible = "arm,aest";
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ aest-processor-0 {
+ compatible = "arm,aest-processor";
+ arm,num-records = <1>;
+ arm,record-impl = /bits/ 64 <0x0>;
+ arm,status-reporting = /bits/ 64 <0x0>;
+ arm,addressing-mode = /bits/ 64 <0x0>;
+ arm,processor-flags = <AEST_PROC_GLOBAL>;
+ interrupts = <GIC_PPI 0 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-names = "fhi";
+ };
+
+ aest-l3-cluster0 {
+ compatible = "arm,aest-processor";
+ arm,num-records = <2>;
+ arm,record-impl = /bits/ 64 <0x1>;
+ arm,status-reporting = /bits/ 64 <0x0>;
+ arm,addressing-mode = /bits/ 64 <0x0>;
+ arm,processor-flags = <AEST_PROC_SHARED>;
+ interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "fhi";
+ };
+
+ aest-l3-cluster1 {
+ compatible = "arm,aest-processor";
+ arm,num-records = <2>;
+ arm,record-impl = /bits/ 64 <0x1>;
+ arm,status-reporting = /bits/ 64 <0x0>;
+ arm,addressing-mode = /bits/ 64 <0x0>;
+ arm,processor-flags = <AEST_PROC_SHARED>;
+ interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "fhi";
+ };
+ };
+
clocks {
xo_board_clk: xo-board-clk {
compatible = "fixed-clock";
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 8/8] arm64: dts: qcom: monaco: add AEST error nodes
2026-05-05 12:23 [PATCH 0/8] ras: aest: extend AEST support to Device Tree frontend Umang Chheda
` (6 preceding siblings ...)
2026-05-05 12:23 ` [PATCH 7/8] arm64: dts: qcom: lemans: add AEST error nodes Umang Chheda
@ 2026-05-05 12:23 ` Umang Chheda
7 siblings, 0 replies; 9+ messages in thread
From: Umang Chheda @ 2026-05-05 12:23 UTC (permalink / raw)
To: Ruidong Tian, Tony Luck, Borislav Petkov, Rob Herring,
Krzysztof Kozlowski, Conor Dooley, Bjorn Andersson, Konrad Dybcio,
catalin.marinas, will, lpieralisi, rafael, mark.rutland,
Sudeep Holla
Cc: linux-arm-msm, linux-acpi, linux-arm-kernel, linux-edac,
linux-kernel, devicetree, linux-edac, Umang Chheda,
Faruque Ansari
Add AEST RAS error source nodes for the Monaco SoC.
The DT describes a processor error source covering all CPU cores and a
shared L3 cache error source for the cluster. These nodes model the
hardware error reporting blocks and associated interrupts as required
by the Arm AEST specification.
Co-developed-by: Faruque Ansari <faruque.ansari@oss.qualcomm.com>
Signed-off-by: Faruque Ansari <faruque.ansari@oss.qualcomm.com>
Signed-off-by: Umang Chheda <umang.chheda@oss.qualcomm.com>
---
arch/arm64/boot/dts/qcom/monaco.dtsi | 41 ++++++++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)
diff --git a/arch/arm64/boot/dts/qcom/monaco.dtsi b/arch/arm64/boot/dts/qcom/monaco.dtsi
index 7b1d57460f1e..8e43ceed7d84 100644
--- a/arch/arm64/boot/dts/qcom/monaco.dtsi
+++ b/arch/arm64/boot/dts/qcom/monaco.dtsi
@@ -3,6 +3,7 @@
* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
+#include <dt-bindings/arm/aest.h>
#include <dt-bindings/clock/qcom,dsi-phy-28nm.h>
#include <dt-bindings/clock/qcom,qcs8300-gcc.h>
#include <dt-bindings/clock/qcom,rpmh.h>
@@ -29,6 +30,46 @@ / {
#address-cells = <2>;
#size-cells = <2>;
+ aest {
+ compatible = "arm,aest";
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ aest-processor-0 {
+ compatible = "arm,aest-processor";
+ arm,num-records = <1>;
+ arm,record-impl = /bits/ 64 <0x0>;
+ arm,status-reporting = /bits/ 64 <0x0>;
+ arm,addressing-mode = /bits/ 64 <0x0>;
+ arm,processor-flags = <AEST_PROC_GLOBAL>;
+ interrupts = <GIC_PPI 0 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-names = "fhi";
+ };
+
+ aest-l3-cluster0 {
+ compatible = "arm,aest-processor";
+ arm,num-records = <2>;
+ arm,record-impl = /bits/ 64 <0x1>;
+ arm,status-reporting = /bits/ 64 <0x0>;
+ arm,addressing-mode = /bits/ 64 <0x0>;
+ arm,processor-flags = <AEST_PROC_SHARED>;
+ interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "fhi";
+ };
+
+ aest-l3-cluster1 {
+ compatible = "arm,aest-processor";
+ arm,num-records = <2>;
+ arm,record-impl = /bits/ 64 <0x1>;
+ arm,status-reporting = /bits/ 64 <0x0>;
+ arm,addressing-mode = /bits/ 64 <0x0>;
+ arm,processor-flags = <AEST_PROC_SHARED>;
+ interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "fhi";
+ };
+ };
+
clocks {
xo_board_clk: xo-board-clk {
compatible = "fixed-clock";
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread