[PATCH v1 4/4] selftests: firmware: Add NVIDIA GHES EINJ selftest

From: Kai-Heng Feng

Date: Fri Jun 12 2026 - 08:15:26 EST


Exercise the full driver path on real Grace and Vera hardware using
ACPI EINJ to inject CPER sections and validate the kernel log output.
KUnit covers the parser in isolation; this test covers the path from
firmware notification through GUID dispatch to decoded output.

Signed-off-by: Kai-Heng Feng <kaihengf@xxxxxxxxxx>
---
tools/testing/selftests/firmware/Makefile | 4 +-
tools/testing/selftests/firmware/config | 5 +
tools/testing/selftests/firmware/einj_lib.sh | 189 ++++++++++++++++++
.../selftests/firmware/ghes_nvidia_einj.sh | 144 +++++++++++++
.../firmware/ghes_nvidia_einj_profiles.sh | 46 +++++
5 files changed, 386 insertions(+), 2 deletions(-)
create mode 100644 tools/testing/selftests/firmware/einj_lib.sh
create mode 100755 tools/testing/selftests/firmware/ghes_nvidia_einj.sh
create mode 100755 tools/testing/selftests/firmware/ghes_nvidia_einj_profiles.sh

diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
index 7992969deaa2..b753dd123860 100644
--- a/tools/testing/selftests/firmware/Makefile
+++ b/tools/testing/selftests/firmware/Makefile
@@ -3,8 +3,8 @@
CFLAGS = -Wall \
-O2

-TEST_PROGS := fw_run_tests.sh
-TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_upload.sh fw_lib.sh
+TEST_PROGS := fw_run_tests.sh ghes_nvidia_einj.sh
+TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_upload.sh fw_lib.sh einj_lib.sh ghes_nvidia_einj_profiles.sh
TEST_GEN_FILES := fw_namespace

include ../lib.mk
diff --git a/tools/testing/selftests/firmware/config b/tools/testing/selftests/firmware/config
index 6e402519b117..1b68e638d0b7 100644
--- a/tools/testing/selftests/firmware/config
+++ b/tools/testing/selftests/firmware/config
@@ -4,3 +4,8 @@ CONFIG_FW_LOADER_USER_HELPER=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_FW_UPLOAD=y
+CONFIG_DEBUG_FS=y
+CONFIG_ACPI_APEI=y
+CONFIG_ACPI_APEI_GHES=y
+CONFIG_ACPI_APEI_EINJ=y
+CONFIG_ACPI_APEI_GHES_NVIDIA=y
diff --git a/tools/testing/selftests/firmware/einj_lib.sh b/tools/testing/selftests/firmware/einj_lib.sh
new file mode 100644
index 000000000000..ca569a9fe5b0
--- /dev/null
+++ b/tools/testing/selftests/firmware/einj_lib.sh
@@ -0,0 +1,189 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+EINJ_TABLE=/sys/firmware/acpi/tables/EINJ
+EINJ_DEBUGFS=/sys/kernel/debug/apei/einj
+NVIDIA_PLATFORM_GLOB=/sys/bus/platform/devices/NVDA2012:*
+NVIDIA_DRIVER_DIR=/sys/bus/platform/drivers/nvidia-ghes
+
+einj_skip()
+{
+ echo "$0: $1" >&2
+ exit $ksft_skip
+}
+
+einj_require_root()
+{
+ [ "$(id -u)" -eq 0 ] || einj_skip "must be run as root"
+}
+
+einj_require_debugfs()
+{
+ [ -d /sys/kernel/debug ] || einj_skip "debugfs is not mounted at /sys/kernel/debug"
+}
+
+einj_require_einj()
+{
+ [ -e "$EINJ_TABLE" ] || einj_skip "ACPI EINJ table is missing"
+ if [ ! -d "$EINJ_DEBUGFS" ]; then
+ modprobe einj 2>/dev/null || true
+ fi
+ [ -d "$EINJ_DEBUGFS" ] || einj_skip "EINJ debugfs directory is missing"
+}
+
+einj_require_vendor_einj()
+{
+ [ -e "$EINJ_DEBUGFS/vendor" ] || einj_skip "NVIDIA vendor EINJ metadata is missing"
+ [ -e "$EINJ_DEBUGFS/vendor_flags" ] || einj_skip "NVIDIA vendor EINJ flags are missing"
+}
+
+einj_require_available_error_type()
+{
+ local available
+
+ available=$(einj_read_trimmed_value available_error_type)
+ [ -n "$available" ] || einj_skip "available_error_type is missing"
+}
+
+einj_read_trimmed_value()
+{
+ local file=$1
+
+ einj_read_value "$file" | tr -d '\n'
+}
+
+einj_require_writable_value()
+{
+ local file=$1
+
+ [ -w "$EINJ_DEBUGFS/$file" ] || einj_skip "$file is not writable"
+}
+
+einj_require_writable_profile()
+{
+ local file
+
+ for file in error_type flags vendor_flags param1 param2 param3 param4 notrigger; do
+ einj_require_writable_value "$file"
+ done
+}
+
+einj_find_bound_nvidia_device()
+{
+ local dev
+
+ for dev in $NVIDIA_PLATFORM_GLOB; do
+ [ -e "$dev" ] || continue
+ if [ "$(readlink -f "$dev/driver" 2>/dev/null)" = "$NVIDIA_DRIVER_DIR" ]; then
+ echo "$dev"
+ return 0
+ fi
+ done
+
+ return 1
+}
+
+einj_require_bound_nvidia_device()
+{
+ local dev
+
+ dev=$(einj_find_bound_nvidia_device) || einj_skip "no bound NVIDIA GHES device"
+ echo "$dev"
+}
+
+einj_read_value()
+{
+ local file=$1
+
+ cat "$EINJ_DEBUGFS/$file"
+}
+
+einj_write_value()
+{
+ local file=$1
+ local value=$2
+
+ printf '%s\n' "$value" > "$EINJ_DEBUGFS/$file"
+}
+
+einj_restore_value()
+{
+ local file=$1
+ local value=$2
+
+ # Some EINJ controls read back as an empty string when unset, but the
+ # debugfs write handler has no matching "clear" operation.
+ [ -n "$value" ] || return 0
+ einj_write_value "$file" "$value"
+}
+
+einj_save_state()
+{
+ EINJ_SAVED_ERROR_TYPE=$(einj_read_value error_type)
+ EINJ_SAVED_FLAGS=$(einj_read_value flags)
+ EINJ_SAVED_PARAM1=$(einj_read_value param1)
+ EINJ_SAVED_PARAM2=$(einj_read_value param2)
+ EINJ_SAVED_PARAM3=$(einj_read_value param3)
+ EINJ_SAVED_PARAM4=$(einj_read_value param4)
+ EINJ_SAVED_VENDOR_FLAGS=$(einj_read_value vendor_flags)
+ EINJ_SAVED_NOTRIGGER=$(einj_read_value notrigger)
+}
+
+einj_restore_state()
+{
+ [ -n "${EINJ_SAVED_ERROR_TYPE+x}" ] || return 0
+
+ einj_restore_value error_type "$EINJ_SAVED_ERROR_TYPE"
+ einj_restore_value flags "$EINJ_SAVED_FLAGS"
+ einj_restore_value param1 "$EINJ_SAVED_PARAM1"
+ einj_restore_value param2 "$EINJ_SAVED_PARAM2"
+ einj_restore_value param3 "$EINJ_SAVED_PARAM3"
+ einj_restore_value param4 "$EINJ_SAVED_PARAM4"
+ einj_restore_value vendor_flags "$EINJ_SAVED_VENDOR_FLAGS"
+ einj_restore_value notrigger "$EINJ_SAVED_NOTRIGGER"
+}
+
+einj_emit_kmsg_marker()
+{
+ local tag=$1
+ local marker
+
+ marker="ghes-nvidia-einj:${tag}:$$:${RANDOM}"
+ printf '%s\n' "$marker" > /dev/kmsg
+ printf '%s\n' "$marker"
+}
+
+einj_capture_dmesg_after_marker()
+{
+ local marker=$1
+
+ dmesg | awk -v marker="$marker" '
+ found { print }
+ index($0, marker) { found = 1 }
+ '
+}
+
+einj_wait_for_dmesg_after_marker_contains()
+{
+ local marker=$1
+ local needle=$2
+ local timeout=${3:-10}
+ local i
+ local slice
+
+ for i in $(seq 1 "$timeout"); do
+ slice=$(einj_capture_dmesg_after_marker "$marker")
+ if printf '%s\n' "$slice" | grep -Fq "$needle"; then
+ printf '%s\n' "$slice"
+ return 0
+ fi
+ sleep 1
+ done
+
+ return 1
+}
diff --git a/tools/testing/selftests/firmware/ghes_nvidia_einj.sh b/tools/testing/selftests/firmware/ghes_nvidia_einj.sh
new file mode 100755
index 000000000000..6fc4d3189235
--- /dev/null
+++ b/tools/testing/selftests/firmware/ghes_nvidia_einj.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+TEST_DIR=$(dirname "$0")
+source "$TEST_DIR/einj_lib.sh"
+source "$TEST_DIR/ghes_nvidia_einj_profiles.sh"
+
+einj_assert_nvidia_cper_output()
+{
+ local profile=$1
+ local output=$2
+
+ if printf '%s\n' "$output" | grep -Fq 'Malformed NVIDIA'; then
+ echo "$0: $profile produced malformed NVIDIA CPER output" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+
+ if printf '%s\n' "$output" | grep -Fq 'NVIDIA Grace CPER section'; then
+ if ! printf '%s\n' "$output" | grep -Fq 'signature:'; then
+ echo "$0: $profile Grace output missing signature line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ if ! printf '%s\n' "$output" | grep -Fq 'error_type:'; then
+ echo "$0: $profile Grace output missing error_type line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ if ! printf '%s\n' "$output" | grep -Fq 'number_regs:'; then
+ echo "$0: $profile Grace output missing number_regs line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ if ! printf '%s\n' "$output" | grep -Fq 'instance_base:'; then
+ echo "$0: $profile Grace output missing instance_base line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ return 0
+ fi
+
+ if printf '%s\n' "$output" | grep -Fq 'NVIDIA Vera CPER section'; then
+ if ! printf '%s\n' "$output" | grep -Fq 'signature:'; then
+ echo "$0: $profile Vera output missing signature line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ if ! printf '%s\n' "$output" | grep -Fq 'event_type:'; then
+ echo "$0: $profile Vera output missing event_type line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ if ! printf '%s\n' "$output" | grep -Fq 'event_sub_type:'; then
+ echo "$0: $profile Vera output missing event_sub_type line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ if ! printf '%s\n' "$output" | grep -Fq 'event_context_count:'; then
+ echo "$0: $profile Vera output missing event_context_count line" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+ fi
+ return 0
+ fi
+
+ echo "$0: $profile did not emit a recognized NVIDIA CPER section" >&2
+ printf '%s\n' "$output" >&2
+ return 1
+}
+
+einj_run_profile()
+{
+ local profile=$1
+ local marker
+ local output
+
+ if ! einj_select_profile "$profile"; then
+ echo "$0: unknown safe NVIDIA EINJ profile: $profile" >&2
+ return 1
+ fi
+
+ einj_require_writable_profile
+
+ printf '%s: running safe sample %s\n' "$0" "$profile"
+ marker=$(einj_emit_kmsg_marker "$profile")
+
+ einj_write_value error_type "$EINJ_PROFILE_ERROR_TYPE"
+ einj_write_value flags 0
+ einj_write_value vendor_flags "$EINJ_PROFILE_VENDOR_FLAGS"
+ einj_write_value param1 "$EINJ_PROFILE_PARAM1"
+ einj_write_value param2 "$EINJ_PROFILE_PARAM2"
+ einj_write_value param3 "$EINJ_PROFILE_PARAM3"
+ einj_write_value param4 "$EINJ_PROFILE_PARAM4"
+ einj_write_value notrigger 0
+ einj_write_value error_inject 1
+
+ output=$(einj_wait_for_dmesg_after_marker_contains "$marker" "$EINJ_PROFILE_BANNER" 10) || {
+ printf '%s: %s not supported on this platform\n' "$0" "$profile"
+ return "$ksft_skip"
+ }
+
+ einj_assert_nvidia_cper_output "$profile" "$output"
+}
+
+einj_cleanup()
+{
+ local status=$1
+
+ if ! einj_restore_state; then
+ echo "$0: failed to restore EINJ state" >&2
+ [ "$status" -eq 0 ] && status=1
+ fi
+
+ exit "$status"
+}
+
+main()
+{
+ local profile
+ local passed=0
+
+ einj_require_root
+ einj_require_debugfs
+ einj_require_einj
+ einj_require_vendor_einj
+ einj_require_available_error_type
+ einj_save_state
+ trap 'einj_cleanup "$?"' EXIT
+
+ einj_require_bound_nvidia_device
+
+ for profile in $(einj_list_profiles); do
+ einj_run_profile "$profile" && passed=$((passed + 1)) || {
+ [ "$?" -eq "$ksft_skip" ] || exit 1
+ }
+ done
+
+ [ "$passed" -gt 0 ] || einj_skip "no NVIDIA EINJ profiles produced output"
+}
+
+main "$@"
diff --git a/tools/testing/selftests/firmware/ghes_nvidia_einj_profiles.sh b/tools/testing/selftests/firmware/ghes_nvidia_einj_profiles.sh
new file mode 100755
index 000000000000..b25461d2238c
--- /dev/null
+++ b/tools/testing/selftests/firmware/ghes_nvidia_einj_profiles.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+# Run both architecture profiles on every platform; firmware silently ignores
+# selectors it does not support, so a timeout just means "not this platform".
+EINJ_PROFILE_NAMES="cmet_dump_status_grace cmet_dump_status_vera"
+
+einj_list_profiles()
+{
+ printf '%s\n' $EINJ_PROFILE_NAMES
+}
+
+einj_select_profile()
+{
+ local profile=$1
+
+ case "$profile" in
+ cmet_dump_status_grace)
+ # Grace CMET dump/status: informational sample, selector 3.
+ EINJ_PROFILE_ERROR_TYPE=0x80000010
+ EINJ_PROFILE_VENDOR_FLAGS=1
+ EINJ_PROFILE_PARAM1=3
+ EINJ_PROFILE_PARAM2=0
+ EINJ_PROFILE_PARAM3=0
+ EINJ_PROFILE_PARAM4=0
+ EINJ_PROFILE_BANNER='NVIDIA Grace CPER section'
+ ;;
+ cmet_dump_status_vera)
+ # Vera CMET-NULL dump/status: informational sample, selector 0.
+ EINJ_PROFILE_ERROR_TYPE=0x80000010
+ EINJ_PROFILE_VENDOR_FLAGS=1
+ EINJ_PROFILE_PARAM1=0
+ EINJ_PROFILE_PARAM2=0
+ EINJ_PROFILE_PARAM3=0
+ EINJ_PROFILE_PARAM4=0
+ EINJ_PROFILE_BANNER='NVIDIA Vera CPER section'
+ ;;
+ *)
+ return 1
+ ;;
+ esac
+
+ return 0
+}
--
2.50.1 (Apple Git-155)