From: Kai-Heng Feng <kaihengf@nvidia.com>
To: rafael@kernel.org, shuah@kernel.org, kees@kernel.org
Cc: julianbraha@gmail.com, linux-kernel@vger.kernel.org,
linux-acpi@vger.kernel.org, linux-kselftest@vger.kernel.org,
linux-hardening@vger.kernel.org, csoto@nvidia.com,
mochs@nvidia.com, Kai-Heng Feng <kaihengf@nvidia.com>
Subject: [PATCH v2 4/4] selftests: firmware: Add NVIDIA GHES EINJ selftest
Date: Tue, 16 Jun 2026 11:44:10 +0800 [thread overview]
Message-ID: <20260616034410.70675-5-kaihengf@nvidia.com> (raw)
In-Reply-To: <20260616034410.70675-1-kaihengf@nvidia.com>
Exercise the full driver path on real Grace and Vera hardware using
ACPI EINJ to inject CPER sections and validate the kernel log output.
KUnit covers the parser in isolation; this test covers the path from
firmware notification through GUID dispatch to decoded output.
Signed-off-by: Kai-Heng Feng <kaihengf@nvidia.com>
---
v2:
- No change.
tools/testing/selftests/firmware/Makefile | 4 +-
tools/testing/selftests/firmware/config | 5 +
tools/testing/selftests/firmware/einj_lib.sh | 189 ++++++++++++++++++
.../selftests/firmware/ghes_nvidia_einj.sh | 144 +++++++++++++
.../firmware/ghes_nvidia_einj_profiles.sh | 46 +++++
5 files changed, 386 insertions(+), 2 deletions(-)
create mode 100644 tools/testing/selftests/firmware/einj_lib.sh
create mode 100755 tools/testing/selftests/firmware/ghes_nvidia_einj.sh
create mode 100755 tools/testing/selftests/firmware/ghes_nvidia_einj_profiles.sh
diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
index 7992969deaa2..b753dd123860 100644
--- a/tools/testing/selftests/firmware/Makefile
+++ b/tools/testing/selftests/firmware/Makefile
@@ -3,8 +3,8 @@
CFLAGS = -Wall \
-O2
-TEST_PROGS := fw_run_tests.sh
-TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_upload.sh fw_lib.sh
+TEST_PROGS := fw_run_tests.sh ghes_nvidia_einj.sh
+TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_upload.sh fw_lib.sh einj_lib.sh ghes_nvidia_einj_profiles.sh
TEST_GEN_FILES := fw_namespace
include ../lib.mk
diff --git a/tools/testing/selftests/firmware/config b/tools/testing/selftests/firmware/config
index 6e402519b117..1b68e638d0b7 100644
--- a/tools/testing/selftests/firmware/config
+++ b/tools/testing/selftests/firmware/config
@@ -4,3 +4,8 @@ CONFIG_FW_LOADER_USER_HELPER=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_FW_UPLOAD=y
+CONFIG_DEBUG_FS=y
+CONFIG_ACPI_APEI=y
+CONFIG_ACPI_APEI_GHES=y
+CONFIG_ACPI_APEI_EINJ=y
+CONFIG_ACPI_APEI_GHES_NVIDIA=y
diff --git a/tools/testing/selftests/firmware/einj_lib.sh b/tools/testing/selftests/firmware/einj_lib.sh
new file mode 100644
index 000000000000..ca569a9fe5b0
--- /dev/null
+++ b/tools/testing/selftests/firmware/einj_lib.sh
@@ -0,0 +1,189 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+EINJ_TABLE=/sys/firmware/acpi/tables/EINJ
+EINJ_DEBUGFS=/sys/kernel/debug/apei/einj
+NVIDIA_PLATFORM_GLOB=/sys/bus/platform/devices/NVDA2012:*
+NVIDIA_DRIVER_DIR=/sys/bus/platform/drivers/nvidia-ghes
+
+einj_skip()
+{
+ echo "$0: $1" >&2
+ exit $ksft_skip
+}
+
+einj_require_root()
+{
+ [ "$(id -u)" -eq 0 ] || einj_skip "must be run as root"
+}
+
+einj_require_debugfs()
+{
+ [ -d /sys/kernel/debug ] || einj_skip "debugfs is not mounted at /sys/kernel/debug"
+}
+
+einj_require_einj()
+{
+ [ -e "$EINJ_TABLE" ] || einj_skip "ACPI EINJ table is missing"
+ if [ ! -d "$EINJ_DEBUGFS" ]; then
+ modprobe einj 2>/dev/null || true
+ fi
+ [ -d "$EINJ_DEBUGFS" ] || einj_skip "EINJ debugfs directory is missing"
+}
+
+einj_require_vendor_einj()
+{
+ [ -e "$EINJ_DEBUGFS/vendor" ] || einj_skip "NVIDIA vendor EINJ metadata is missing"
+ [ -e "$EINJ_DEBUGFS/vendor_flags" ] || einj_skip "NVIDIA vendor EINJ flags are missing"
+}
+
+einj_require_available_error_type()
+{
+ local available
+
+ available=$(einj_read_trimmed_value available_error_type)
+ [ -n "$available" ] || einj_skip "available_error_type is missing"
+}
+
+einj_read_trimmed_value()
+{
+ local file=$1
+
+ einj_read_value "$file" | tr -d '\n'
+}
+
+einj_require_writable_value()
+{
+ local file=$1
+
+ [ -w "$EINJ_DEBUGFS/$file" ] || einj_skip "$file is not writable"
+}
+
+einj_require_writable_profile()
+{
+ local file
+
+ for file in error_type flags vendor_flags param1 param2 param3 param4 notrigger; do
+ einj_require_writable_value "$file"
+ done
+}
+
+einj_find_bound_nvidia_device()
+{
+ local dev
+
+ for dev in $NVIDIA_PLATFORM_GLOB; do
+ [ -e "$dev" ] || continue
+ if [ "$(readlink -f "$dev/driver" 2>/dev/null)" = "$NVIDIA_DRIVER_DIR" ]; then
+ echo "$dev"
+ return 0
+ fi
+ done
+
+ return 1
+}
+
+einj_require_bound_nvidia_device()
+{
+ local dev
+
+ dev=$(einj_find_bound_nvidia_device) || einj_skip "no bound NVIDIA GHES device"
+ echo "$dev"
+}
+
+einj_read_value()
+{
+ local file=$1
+
+ cat "$EINJ_DEBUGFS/$file"
+}
+
+einj_write_value()
+{
+ local file=$1
+ local value=$2
+
+ printf '%s\n' "$value" > "$EINJ_DEBUGFS/$file"
+}
+
+einj_restore_value()
+{
+ local file=$1
+ local value=$2
+
+ # Some EINJ controls read back as an empty string when unset, but the
+ # debugfs write handler has no matching "clear" operation.
+ [ -n "$value" ] || return 0
+ einj_write_value "$file" "$value"
+}
+
+einj_save_state()
+{
+ EINJ_SAVED_ERROR_TYPE=$(einj_read_value error_type)
+ EINJ_SAVED_FLAGS=$(einj_read_value flags)
+ EINJ_SAVED_PARAM1=$(einj_read_value param1)
+ EINJ_SAVED_PARAM2=$(einj_read_value param2)
+ EINJ_SAVED_PARAM3=$(einj_read_value param3)
+ EINJ_SAVED_PARAM4=$(einj_read_value param4)
+ EINJ_SAVED_VENDOR_FLAGS=$(einj_read_value vendor_flags)
+ EINJ_SAVED_NOTRIGGER=$(einj_read_value notrigger)
+}
+
+einj_restore_state()
+{
+ [ -n "${EINJ_SAVED_ERROR_TYPE+x}" ] || return 0
+
+ einj_restore_value error_type "$EINJ_SAVED_ERROR_TYPE"
+ einj_restore_value flags "$EINJ_SAVED_FLAGS"
+ einj_restore_value param1 "$EINJ_SAVED_PARAM1"
+ einj_restore_value param2 "$EINJ_SAVED_PARAM2"
+ einj_restore_value param3 "$EINJ_SAVED_PARAM3"
+ einj_restore_value param4 "$EINJ_SAVED_PARAM4"
+ einj_restore_value vendor_flags "$EINJ_SAVED_VENDOR_FLAGS"
+ einj_restore_value notrigger "$EINJ_SAVED_NOTRIGGER"
+}
+
+einj_emit_kmsg_marker()
+{
+ local tag=$1
+ local marker
+
+ marker="ghes-nvidia-einj:${tag}:$$:${RANDOM}"
+ printf '%s\n' "$marker" > /dev/kmsg
+ printf '%s\n' "$marker"
+}
+
+einj_capture_dmesg_after_marker()
+{
+ local marker=$1
+
+ dmesg | awk -v marker="$marker" '
+ found { print }
+ index($0, marker) { found = 1 }
+ '
+}
+
+einj_wait_for_dmesg_after_marker_contains()
+{
+ local marker=$1
+ local needle=$2
+ local timeout=${3:-10}
+ local i
+ local slice
+
+ for i in $(seq 1 "$timeout"); do
+ slice=$(einj_capture_dmesg_after_marker "$marker")
+ if printf '%s\n' "$slice" | grep -Fq "$needle"; then
+ printf '%s\n' "$slice"
+ return 0
+ fi
+ sleep 1
+ done
+
+ return 1
+}
diff --git a/tools/testing/selftests/firmware/ghes_nvidia_einj.sh b/tools/testing/selftests/firmware/ghes_nvidia_einj.sh
new file mode 100755
index 000000000000..6fc4d3189235
--- /dev/null
+++ b/tools/testing/selftests/firmware/ghes_nvidia_einj.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+TEST_DIR=$(dirname "$0")
+source "$TEST_DIR/einj_lib.sh"
+source "$TEST_DIR/ghes_nvidia_einj_profiles.sh"
+
+einj_assert_nvidia_cper_output()
+{
+ local profile=$1
+ local output=$2
+
+ if printf '%s\n' "$output" | grep -Fq 'Malformed NVIDIA'; then
+ echo "$0: $profile produced malformed NVIDIA CPER output" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+
+ if printf '%s\n' "$output" | grep -Fq 'NVIDIA Grace CPER section'; then
+ if ! printf '%s\n' "$output" | grep -Fq 'signature:'; then
+ echo "$0: $profile Grace output missing signature line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ if ! printf '%s\n' "$output" | grep -Fq 'error_type:'; then
+ echo "$0: $profile Grace output missing error_type line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ if ! printf '%s\n' "$output" | grep -Fq 'number_regs:'; then
+ echo "$0: $profile Grace output missing number_regs line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ if ! printf '%s\n' "$output" | grep -Fq 'instance_base:'; then
+ echo "$0: $profile Grace output missing instance_base line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ return 0
+ fi
+
+ if printf '%s\n' "$output" | grep -Fq 'NVIDIA Vera CPER section'; then
+ if ! printf '%s\n' "$output" | grep -Fq 'signature:'; then
+ echo "$0: $profile Vera output missing signature line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ if ! printf '%s\n' "$output" | grep -Fq 'event_type:'; then
+ echo "$0: $profile Vera output missing event_type line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ if ! printf '%s\n' "$output" | grep -Fq 'event_sub_type:'; then
+ echo "$0: $profile Vera output missing event_sub_type line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ if ! printf '%s\n' "$output" | grep -Fq 'event_context_count:'; then
+ echo "$0: $profile Vera output missing event_context_count line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ return 0
+ fi
+
+ echo "$0: $profile did not emit a recognized NVIDIA CPER section" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+}
+
+einj_run_profile()
+{
+ local profile=$1
+ local marker
+ local output
+
+ if ! einj_select_profile "$profile"; then
+ echo "$0: unknown safe NVIDIA EINJ profile: $profile" >&2
+ return 1
+ fi
+
+ einj_require_writable_profile
+
+ printf '%s: running safe sample %s\n' "$0" "$profile"
+ marker=$(einj_emit_kmsg_marker "$profile")
+
+ einj_write_value error_type "$EINJ_PROFILE_ERROR_TYPE"
+ einj_write_value flags 0
+ einj_write_value vendor_flags "$EINJ_PROFILE_VENDOR_FLAGS"
+ einj_write_value param1 "$EINJ_PROFILE_PARAM1"
+ einj_write_value param2 "$EINJ_PROFILE_PARAM2"
+ einj_write_value param3 "$EINJ_PROFILE_PARAM3"
+ einj_write_value param4 "$EINJ_PROFILE_PARAM4"
+ einj_write_value notrigger 0
+ einj_write_value error_inject 1
+
+ output=$(einj_wait_for_dmesg_after_marker_contains "$marker" "$EINJ_PROFILE_BANNER" 10) || {
+ printf '%s: %s not supported on this platform\n' "$0" "$profile"
+ return "$ksft_skip"
+ }
+
+ einj_assert_nvidia_cper_output "$profile" "$output"
+}
+
+einj_cleanup()
+{
+ local status=$1
+
+ if ! einj_restore_state; then
+ echo "$0: failed to restore EINJ state" >&2
+ [ "$status" -eq 0 ] && status=1
+ fi
+
+ exit "$status"
+}
+
+main()
+{
+ local profile
+ local passed=0
+
+ einj_require_root
+ einj_require_debugfs
+ einj_require_einj
+ einj_require_vendor_einj
+ einj_require_available_error_type
+ einj_save_state
+ trap 'einj_cleanup "$?"' EXIT
+
+ einj_require_bound_nvidia_device
+
+ for profile in $(einj_list_profiles); do
+ einj_run_profile "$profile" && passed=$((passed + 1)) || {
+ [ "$?" -eq "$ksft_skip" ] || exit 1
+ }
+ done
+
+ [ "$passed" -gt 0 ] || einj_skip "no NVIDIA EINJ profiles produced output"
+}
+
+main "$@"
diff --git a/tools/testing/selftests/firmware/ghes_nvidia_einj_profiles.sh b/tools/testing/selftests/firmware/ghes_nvidia_einj_profiles.sh
new file mode 100755
index 000000000000..b25461d2238c
--- /dev/null
+++ b/tools/testing/selftests/firmware/ghes_nvidia_einj_profiles.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+# Run both architecture profiles on every platform; firmware silently ignores
+# selectors it does not support, so a timeout just means "not this platform".
+EINJ_PROFILE_NAMES="cmet_dump_status_grace cmet_dump_status_vera"
+
+einj_list_profiles()
+{
+ printf '%s\n' $EINJ_PROFILE_NAMES
+}
+
+einj_select_profile()
+{
+ local profile=$1
+
+ case "$profile" in
+ cmet_dump_status_grace)
+ # Grace CMET dump/status: informational sample, selector 3.
+ EINJ_PROFILE_ERROR_TYPE=0x80000010
+ EINJ_PROFILE_VENDOR_FLAGS=1
+ EINJ_PROFILE_PARAM1=3
+ EINJ_PROFILE_PARAM2=0
+ EINJ_PROFILE_PARAM3=0
+ EINJ_PROFILE_PARAM4=0
+ EINJ_PROFILE_BANNER='NVIDIA Grace CPER section'
+ ;;
+ cmet_dump_status_vera)
+ # Vera CMET-NULL dump/status: informational sample, selector 0.
+ EINJ_PROFILE_ERROR_TYPE=0x80000010
+ EINJ_PROFILE_VENDOR_FLAGS=1
+ EINJ_PROFILE_PARAM1=0
+ EINJ_PROFILE_PARAM2=0
+ EINJ_PROFILE_PARAM3=0
+ EINJ_PROFILE_PARAM4=0
+ EINJ_PROFILE_BANNER='NVIDIA Vera CPER section'
+ ;;
+ *)
+ return 1
+ ;;
+ esac
+
+ return 0
+}
--
2.50.1 (Apple Git-155)
prev parent reply other threads:[~2026-06-16 3:44 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-16 3:44 [PATCH v2 0/4] ACPI: APEI: GHES: Add NVIDIA Vera CPER decoder and tests Kai-Heng Feng
2026-06-16 3:44 ` [PATCH v2 1/4] ACPI: APEI: GHES: Refactor Grace decoder helpers Kai-Heng Feng
2026-06-16 3:44 ` [PATCH v2 2/4] ACPI: APEI: GHES: Add NVIDIA Vera decoder Kai-Heng Feng
2026-06-16 3:44 ` [PATCH v2 3/4] ACPI: APEI: GHES: Add Grace and Vera KUnit coverage Kai-Heng Feng
2026-06-16 3:44 ` Kai-Heng Feng [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260616034410.70675-5-kaihengf@nvidia.com \
--to=kaihengf@nvidia.com \
--cc=csoto@nvidia.com \
--cc=julianbraha@gmail.com \
--cc=kees@kernel.org \
--cc=linux-acpi@vger.kernel.org \
--cc=linux-hardening@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=mochs@nvidia.com \
--cc=rafael@kernel.org \
--cc=shuah@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox