Re: [PATCH v10 17/28] gpu: nova-core: Hopper/Blackwell: add FSP secure boot completion waiting
From: Alexandre Courbot
Date: Fri Apr 17 2026 - 10:29:42 EST
On Sat Apr 11, 2026 at 11:49 AM JST, John Hubbard wrote:
> Add the FSP module with Fsp::wait_secure_boot(), which polls the I2CS
> thermal scratch register until FSP signals boot success or the 5-second
> timeout expires. Hopper and Blackwell use FSP instead of SEC2 for
> secure boot.
>
> Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx>
> ---
> drivers/gpu/nova-core/fsp.rs | 53 ++++++++++++++++++++++++++++++
> drivers/gpu/nova-core/gsp/boot.rs | 5 ++-
> drivers/gpu/nova-core/nova_core.rs | 1 +
> drivers/gpu/nova-core/regs.rs | 29 ++++++++++++++++
> 4 files changed, 87 insertions(+), 1 deletion(-)
> create mode 100644 drivers/gpu/nova-core/fsp.rs
>
> diff --git a/drivers/gpu/nova-core/fsp.rs b/drivers/gpu/nova-core/fsp.rs
> new file mode 100644
> index 000000000000..55e543e80de8
> --- /dev/null
> +++ b/drivers/gpu/nova-core/fsp.rs
> @@ -0,0 +1,53 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +//! FSP (Firmware System Processor) interface for Hopper/Blackwell GPUs.
> +//!
> +//! Hopper/Blackwell use a simplified firmware boot sequence: FMC --> FSP --> GSP.
> +//! Unlike Turing/Ampere/Ada, there is NO SEC2 (Security Engine 2) usage.
> +//! FSP handles secure boot directly using FMC firmware + Chain of Trust.
> +
> +use kernel::{
> + device,
> + io::poll::read_poll_timeout,
> + prelude::*,
> + time::Delta, //
> +};
> +
> +use crate::regs;
> +
> +/// FSP secure boot completion timeout in milliseconds.
> +const FSP_SECURE_BOOT_TIMEOUT_MS: i64 = 5000;
> +
> +/// FSP interface for Hopper/Blackwell GPUs.
> +pub(crate) struct Fsp;
> +
> +impl Fsp {
> + /// Wait for FSP secure boot completion.
> + ///
> + /// Polls the thermal scratch register until FSP signals boot completion
> + /// or timeout occurs.
> + pub(crate) fn wait_secure_boot(
> + dev: &device::Device<device::Bound>,
> + bar: &crate::driver::Bar0,
> + arch: crate::gpu::Architecture,
> + ) -> Result {
> + debug_assert!(
> + regs::read_fsp_boot_complete_status(bar, arch).is_some(),
> + "wait_secure_boot called on non-FSP architecture"
> + );
> +
> + let timeout = Delta::from_millis(FSP_SECURE_BOOT_TIMEOUT_MS);
Let's inline this `timeout` in its only use site below, like the other timeout
is.
> +
> + read_poll_timeout(
> + || regs::read_fsp_boot_complete_status(bar, arch).ok_or(ENOTSUPP),
> + |&status| status == regs::FSP_BOOT_COMPLETE_SUCCESS,
> + Delta::from_millis(10),
> + timeout,
> + )
> + .map_err(|_| {
> + dev_err!(dev, "FSP secure boot completion timeout\n");
> + ETIMEDOUT
> + })
> + .map(|_| ())
> + }
> +}
> diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs
> index 1998bd230185..9609cef3ff51 100644
> --- a/drivers/gpu/nova-core/gsp/boot.rs
> +++ b/drivers/gpu/nova-core/gsp/boot.rs
> @@ -33,6 +33,7 @@
> gsp::GspFirmware,
> FIRMWARE_VERSION, //
> },
> + fsp::Fsp,
> gpu::{
> Architecture,
> Chipset, //
> @@ -199,7 +200,7 @@ fn boot_via_sec2(
> /// the GSP boot internally - no manual GSP reset/boot is needed.
> fn boot_via_fsp(
> dev: &device::Device<device::Bound>,
> - _bar: &Bar0,
> + bar: &Bar0,
> chipset: Chipset,
> _gsp_falcon: &Falcon<Gsp>,
> _wpr_meta: &Coherent<GspFwWprMeta>,
> @@ -209,6 +210,8 @@ fn boot_via_fsp(
>
> let _fsp_fw = FspFirmware::new(dev, chipset, FIRMWARE_VERSION)?;
>
> + Fsp::wait_secure_boot(dev, bar, chipset.arch())?;
> +
> Err(ENOTSUPP)
> }
>
> diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs
> index 3a609f6937e4..53558ac0f619 100644
> --- a/drivers/gpu/nova-core/nova_core.rs
> +++ b/drivers/gpu/nova-core/nova_core.rs
> @@ -17,6 +17,7 @@
> mod falcon;
> mod fb;
> mod firmware;
> +mod fsp;
> mod gpu;
> mod gsp;
> #[macro_use]
> diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
> index 6faeed73901d..e4de7bfffde1 100644
> --- a/drivers/gpu/nova-core/regs.rs
> +++ b/drivers/gpu/nova-core/regs.rs
> @@ -511,6 +511,35 @@ pub(crate) fn mem_scrubbing_done(self) -> bool {
> }
> }
>
> +// PTHERM registers
> +
> +// FSP secure boot completion status register used by FSP to signal boot completion.
> +// This is the NV_THERM_I2CS_SCRATCH register.
> +// Different architectures use different addresses:
> +// - Hopper (GH100) and Blackwell GB10x: 0x000200bc
> +// - Blackwell GB20x: 0x00ad00bc
> +pub(crate) fn fsp_thermal_scratch_reg_addr(arch: Architecture) -> Result<usize> {
> + match arch {
> + Architecture::Hopper | Architecture::BlackwellGB10x => Ok(0x000200bc),
> + Architecture::BlackwellGB20x => Ok(0x00ad00bc),
> + _ => Err(kernel::error::code::ENOTSUPP),
> + }
> +}
This function shouldn't be public as it is only used by the function
right below. But anyway, it is the wrong way to address the
architectural differences since it bypasses the register mechanism
entirely.
The two NV_THERM_I2CS_SCRATCH registers should be defined in `gb100` and
`gb202` sub-modules respectively, similarly to how
`NV_FUSE_STATUS_OPT_DISPLAY` is defined. According to OpenRM they should
both have a 32-bit `fsp_boot_complete` field.
Then, `read_fsp_boot_complete_status` should rely on a FSP HAL to fetch the
correct register (again, similarly to how `NV_FUSE_STATUS_OPT_DISPLAY`
is accessed in the `fb` HAL).