From: Kai-Heng Feng <kaihengf@nvidia.com>
To: rafael@kernel.org, shuah@kernel.org, kees@kernel.org
Cc: csoto@nvidia.com, mochs@nvidia.com,
Kai-Heng Feng <kaihengf@nvidia.com>,
Tony Luck <tony.luck@intel.com>, Borislav Petkov <bp@alien8.de>,
Hanjun Guo <guohanjun@huawei.com>,
Mauro Carvalho Chehab <mchehab@kernel.org>,
Shuai Xue <xueshuai@linux.alibaba.com>,
Len Brown <lenb@kernel.org>,
"Gustavo A. R. Silva" <gustavoars@kernel.org>,
linux-kernel@vger.kernel.org, linux-acpi@vger.kernel.org,
linux-hardening@vger.kernel.org
Subject: [PATCH v1 1/4] ACPI: APEI: GHES: Refactor Grace decoder helpers
Date: Fri, 12 Jun 2026 20:09:06 +0800 [thread overview]
Message-ID: <20260612120929.28965-2-kaihengf@nvidia.com> (raw)
In-Reply-To: <20260612120929.28965-1-kaihengf@nvidia.com>
Split the Grace CPER processing into a separate decode step and a
print step so the parser can be exercised by KUnit without a live
ACPI device. Introduce ghes-nvidia.h to hold shared types that the
Vera decoder added in the next commit will also reference.
Signed-off-by: Kai-Heng Feng <kaihengf@nvidia.com>
---
drivers/acpi/apei/ghes-nvidia.c | 148 +++++++++++++++++++++-----------
drivers/acpi/apei/ghes-nvidia.h | 38 ++++++++
2 files changed, 137 insertions(+), 49 deletions(-)
create mode 100644 drivers/acpi/apei/ghes-nvidia.h
diff --git a/drivers/acpi/apei/ghes-nvidia.c b/drivers/acpi/apei/ghes-nvidia.c
index 597275d81de8..af445152def0 100644
--- a/drivers/acpi/apei/ghes-nvidia.c
+++ b/drivers/acpi/apei/ghes-nvidia.c
@@ -12,7 +12,10 @@
#include <linux/uuid.h>
#include <acpi/ghes.h>
-static const guid_t nvidia_sec_guid =
+#include <kunit/visibility.h>
+#include "ghes-nvidia.h"
+
+static const guid_t nvidia_grace_sec_guid =
GUID_INIT(0x6d5244f2, 0x2712, 0x11ec,
0xbe, 0xa7, 0xcb, 0x3f, 0xdb, 0x95, 0xc7, 0x86);
@@ -25,10 +28,7 @@ struct cper_sec_nvidia {
u8 number_regs;
u8 reserved;
__le64 instance_base;
- struct {
- __le64 addr;
- __le64 val;
- } regs[] __counted_by(number_regs);
+ struct nvidia_ghes_grace_reg regs[] __counted_by(number_regs);
};
struct nvidia_ghes_private {
@@ -36,73 +36,123 @@ struct nvidia_ghes_private {
struct device *dev;
};
-static void nvidia_ghes_print_error(struct device *dev,
- const struct cper_sec_nvidia *nvidia_err,
- size_t error_data_length, bool fatal)
+VISIBLE_IF_KUNIT
+int nvidia_ghes_decode_grace(struct device *dev, const void *buf,
+ size_t len,
+ struct nvidia_ghes_decoded *decoded)
{
- const char *level = fatal ? KERN_ERR : KERN_INFO;
+ const struct cper_sec_nvidia *nvidia_err = buf;
size_t min_size;
- dev_printk(level, dev, "signature: %.16s\n", nvidia_err->signature);
- dev_printk(level, dev, "error_type: %u\n", le16_to_cpu(nvidia_err->error_type));
- dev_printk(level, dev, "error_instance: %u\n", le16_to_cpu(nvidia_err->error_instance));
- dev_printk(level, dev, "severity: %u\n", nvidia_err->severity);
- dev_printk(level, dev, "socket: %u\n", nvidia_err->socket);
- dev_printk(level, dev, "number_regs: %u\n", nvidia_err->number_regs);
- dev_printk(level, dev, "instance_base: 0x%016llx\n",
- le64_to_cpu(nvidia_err->instance_base));
-
- if (nvidia_err->number_regs == 0)
- return;
-
- /*
- * Validate that all registers fit within error_data_length.
- * Each register pair is two little-endian u64s.
- */
+ if (!buf || !decoded)
+ return -EINVAL;
+ if (len < sizeof(*nvidia_err)) {
+ if (dev)
+ dev_err(dev, "Section too small (%zu < %zu)\n",
+ len, sizeof(*nvidia_err));
+ return -ENODATA;
+ }
+
min_size = struct_size(nvidia_err, regs, nvidia_err->number_regs);
- if (error_data_length < min_size) {
- dev_err(dev, "Invalid number_regs %u (section size %zu, need %zu)\n",
- nvidia_err->number_regs, error_data_length, min_size);
- return;
+ if (len < min_size) {
+ if (dev)
+ dev_err(dev,
+ "Invalid number_regs %u (section size %zu, need %zu)\n",
+ nvidia_err->number_regs, len, min_size);
+ return -ENODATA;
}
- for (int i = 0; i < nvidia_err->number_regs; i++)
+ memset(decoded, 0, sizeof(*decoded));
+ decoded->format = NVIDIA_GHES_FORMAT_GRACE;
+ memcpy(decoded->signature, nvidia_err->signature, sizeof(nvidia_err->signature));
+ decoded->signature[sizeof(nvidia_err->signature)] = '\0';
+ decoded->error_type = le16_to_cpu(nvidia_err->error_type);
+ decoded->error_instance = le16_to_cpu(nvidia_err->error_instance);
+ decoded->severity = nvidia_err->severity;
+ decoded->socket = nvidia_err->socket;
+ decoded->number_regs = nvidia_err->number_regs;
+ decoded->instance_base = le64_to_cpu(nvidia_err->instance_base);
+ if (nvidia_err->number_regs)
+ decoded->grace_regs = nvidia_err->regs;
+
+ return 0;
+}
+EXPORT_SYMBOL_IF_KUNIT(nvidia_ghes_decode_grace);
+
+VISIBLE_IF_KUNIT
+int nvidia_ghes_grace_reg_pair(const struct nvidia_ghes_decoded *decoded,
+ unsigned int index, u64 *addr, u64 *val)
+{
+ const struct nvidia_ghes_grace_reg *regs;
+
+ if (!decoded || decoded->format != NVIDIA_GHES_FORMAT_GRACE || !addr || !val)
+ return -EINVAL;
+ if (index >= decoded->number_regs)
+ return -ERANGE;
+
+ regs = decoded->grace_regs;
+ *addr = le64_to_cpu(regs[index].addr);
+ *val = le64_to_cpu(regs[index].val);
+
+ return 0;
+}
+EXPORT_SYMBOL_IF_KUNIT(nvidia_ghes_grace_reg_pair);
+
+static void nvidia_ghes_print_grace(struct device *dev,
+ const struct nvidia_ghes_decoded *decoded,
+ bool fatal)
+{
+ const char *level = fatal ? KERN_ERR : KERN_INFO;
+ u64 addr, val;
+
+ dev_printk(level, dev, "signature: %s\n", decoded->signature);
+ dev_printk(level, dev, "error_type: %u\n", decoded->error_type);
+ dev_printk(level, dev, "error_instance: %u\n", decoded->error_instance);
+ dev_printk(level, dev, "severity: %u\n", decoded->severity);
+ dev_printk(level, dev, "socket: %u\n", decoded->socket);
+ dev_printk(level, dev, "number_regs: %u\n", decoded->number_regs);
+ dev_printk(level, dev, "instance_base: 0x%016llx\n", decoded->instance_base);
+
+ for (int i = 0; i < decoded->number_regs; i++) {
+ if (nvidia_ghes_grace_reg_pair(decoded, i, &addr, &val))
+ break;
dev_printk(level, dev, "register[%d]: address=0x%016llx value=0x%016llx\n",
- i, le64_to_cpu(nvidia_err->regs[i].addr),
- le64_to_cpu(nvidia_err->regs[i].val));
+ i, addr, val);
+ }
}
static int nvidia_ghes_notify(struct notifier_block *nb,
unsigned long event, void *data)
{
struct acpi_hest_generic_data *gdata = data;
+ struct nvidia_ghes_decoded decoded;
struct nvidia_ghes_private *priv;
- const struct cper_sec_nvidia *nvidia_err;
+ const void *payload;
guid_t sec_guid;
+ u32 len;
+ int ret;
+ bool fatal;
import_guid(&sec_guid, gdata->section_type);
- if (!guid_equal(&sec_guid, &nvidia_sec_guid))
+ if (!guid_equal(&sec_guid, &nvidia_grace_sec_guid))
return NOTIFY_DONE;
priv = container_of(nb, struct nvidia_ghes_private, nb);
-
- if (acpi_hest_get_error_length(gdata) < sizeof(*nvidia_err)) {
- dev_err(priv->dev, "Section too small (%d < %zu)\n",
- acpi_hest_get_error_length(gdata), sizeof(*nvidia_err));
+ len = acpi_hest_get_error_length(gdata);
+ payload = acpi_hest_get_payload(gdata);
+ fatal = event >= GHES_SEV_RECOVERABLE;
+
+ ret = nvidia_ghes_decode_grace(priv->dev, payload, len, &decoded);
+ if (ret) {
+ dev_err(priv->dev,
+ "Malformed NVIDIA CPER section, error_data_length: %u, ret: %d\n",
+ len, ret);
return NOTIFY_OK;
}
- nvidia_err = acpi_hest_get_payload(gdata);
-
- if (event >= GHES_SEV_RECOVERABLE)
- dev_err(priv->dev, "NVIDIA CPER section, error_data_length: %u\n",
- acpi_hest_get_error_length(gdata));
- else
- dev_info(priv->dev, "NVIDIA CPER section, error_data_length: %u\n",
- acpi_hest_get_error_length(gdata));
-
- nvidia_ghes_print_error(priv->dev, nvidia_err, acpi_hest_get_error_length(gdata),
- event >= GHES_SEV_RECOVERABLE);
+ dev_printk(fatal ? KERN_ERR : KERN_INFO, priv->dev,
+ "NVIDIA CPER section, error_data_length: %u\n", len);
+ nvidia_ghes_print_grace(priv->dev, &decoded, fatal);
return NOTIFY_OK;
}
diff --git a/drivers/acpi/apei/ghes-nvidia.h b/drivers/acpi/apei/ghes-nvidia.h
new file mode 100644
index 000000000000..f0592fa41abf
--- /dev/null
+++ b/drivers/acpi/apei/ghes-nvidia.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef GHES_NVIDIA_H
+#define GHES_NVIDIA_H
+
+#include <linux/types.h>
+#include <kunit/visibility.h>
+
+struct device;
+
+enum nvidia_ghes_format {
+ NVIDIA_GHES_FORMAT_UNKNOWN,
+ NVIDIA_GHES_FORMAT_GRACE,
+};
+
+struct nvidia_ghes_grace_reg {
+ __le64 addr;
+ __le64 val;
+};
+
+struct nvidia_ghes_decoded {
+ enum nvidia_ghes_format format;
+ char signature[17];
+ u16 error_type;
+ u16 error_instance;
+ u8 severity;
+ u8 socket;
+ u8 number_regs;
+ u64 instance_base;
+ const struct nvidia_ghes_grace_reg *grace_regs;
+};
+
+VISIBLE_IF_KUNIT int nvidia_ghes_decode_grace(struct device *dev, const void *buf,
+ size_t len,
+ struct nvidia_ghes_decoded *decoded);
+VISIBLE_IF_KUNIT int nvidia_ghes_grace_reg_pair(const struct nvidia_ghes_decoded *decoded,
+ unsigned int index, u64 *addr, u64 *val);
+
+#endif
--
2.50.1 (Apple Git-155)
next parent reply other threads:[~2026-06-12 12:14 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20260612120929.28965-1-kaihengf@nvidia.com>
2026-06-12 12:09 ` Kai-Heng Feng [this message]
2026-06-12 12:09 ` [PATCH v1 2/4] ACPI: APEI: GHES: Add NVIDIA Vera decoder Kai-Heng Feng
2026-06-12 12:09 ` [PATCH v1 3/4] ACPI: APEI: GHES: Add Grace and Vera KUnit coverage Kai-Heng Feng
2026-06-12 13:58 ` Julian Braha
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260612120929.28965-2-kaihengf@nvidia.com \
--to=kaihengf@nvidia.com \
--cc=bp@alien8.de \
--cc=csoto@nvidia.com \
--cc=guohanjun@huawei.com \
--cc=gustavoars@kernel.org \
--cc=kees@kernel.org \
--cc=lenb@kernel.org \
--cc=linux-acpi@vger.kernel.org \
--cc=linux-hardening@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mchehab@kernel.org \
--cc=mochs@nvidia.com \
--cc=rafael@kernel.org \
--cc=shuah@kernel.org \
--cc=tony.luck@intel.com \
--cc=xueshuai@linux.alibaba.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox