[PATCH v3 03/12] gpu: nova-core: gsp: replace BootUnloadGuard with local handlers

From: Alexandre Courbot

Date: Mon Jun 29 2026 - 08:40:43 EST


When adding the GSP unload capability, we introduced `BootUnloadGuard`
to automatically call `Gsp::unload` whenever an error occurred during
the boot process, in order to try to reset the GSP to a valid state.

This approach is not well-suited to the errors that may occur in HALs:
by definition, an error occurring in the HAL means that the GSP is not
booted; yet the first thing that `Gsp::unload` does is queue a shutdown
message to the GSP, which will inevitably result in a timeout when done
from a HAL.

Furthermore, `BootUnloadGuard` is problematic because it holds
additional references to the boot context, notably the `Falcon`s. These
extra references stand in the way of making some of the `Falcon`'s
methods mutable, since those methods would require exclusive access. As
this behavior is only needed in one place, introducing dedicated types
for it is distracting and unnecessary.

Thus, remove `BootUnloadGuard` and adopt a two-level error handling
strategy:

- HALs are free to handle their errors as they see fit (most likely, by
running their unload bundle if it is ready by the time of the error),
- `Gsp::boot` uses a `ScopeGuard` that runs `Gsp::unload`, since the
GSP should be up and running by the time `GspHal::boot` has returned.

Signed-off-by: Alexandre Courbot <acourbot@xxxxxxxxxx>
---
drivers/gpu/nova-core/gsp/boot.rs | 67 +++-------------------------------
drivers/gpu/nova-core/gsp/hal.rs | 13 +++----
drivers/gpu/nova-core/gsp/hal/gh100.rs | 20 ++++------
drivers/gpu/nova-core/gsp/hal/tu102.rs | 23 +++++++-----
4 files changed, 33 insertions(+), 90 deletions(-)

diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs
index bb2000b7a78b..0d2213fb1569 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -30,66 +30,6 @@
},
};

-/// Arguments required to call [`Gsp::unload`](super::Gsp::unload).
-///
-/// Stored as their own type to avoid repeating a long and tedious list in [`BootUnloadGuard`].
-pub(super) struct BootUnloadArgs<'a> {
- gsp: &'a super::Gsp,
- dev: &'a device::Device<device::Bound>,
- bar: Bar0<'a>,
- gsp_falcon: &'a Falcon<Gsp>,
- sec2_falcon: &'a Falcon<Sec2>,
- unload_bundle: Option<super::UnloadBundle>,
-}
-
-/// Guard that calls [`Gsp::unload`](super::Gsp::unload) with a
-/// [`UnloadBundle`](super::UnloadBundle) when dropped.
-///
-/// Used to ensure the `UnloadBundle` is run during failure paths.
-pub(super) struct BootUnloadGuard<'a> {
- guard: ScopeGuard<BootUnloadArgs<'a>, fn(BootUnloadArgs<'a>)>,
-}
-
-impl<'a> BootUnloadGuard<'a> {
- /// Wraps `unload_bundle` into a guard that executes it when dropped.
- pub(super) fn new(
- gsp: &'a super::Gsp,
- dev: &'a device::Device<device::Bound>,
- bar: Bar0<'a>,
- gsp_falcon: &'a Falcon<Gsp>,
- sec2_falcon: &'a Falcon<Sec2>,
- unload_bundle: Option<super::UnloadBundle>,
- ) -> Self {
- Self {
- guard: ScopeGuard::new_with_data(
- BootUnloadArgs {
- gsp,
- dev,
- bar,
- gsp_falcon,
- sec2_falcon,
- unload_bundle,
- },
- |args| {
- let _ = super::Gsp::unload(
- args.gsp,
- args.dev,
- args.bar,
- args.gsp_falcon,
- args.sec2_falcon,
- args.unload_bundle,
- );
- },
- ),
- }
- }
-
- /// Disarms the guard and returns the [`UnloadBundle`](super::UnloadBundle) it contains.
- pub(super) fn dismiss(self) -> Option<super::UnloadBundle> {
- self.guard.dismiss().unload_bundle
- }
-}
-
impl super::Gsp {
/// Attempt to boot the GSP.
///
@@ -107,6 +47,7 @@ pub(crate) fn boot(
let bar = ctx.bar;
let chipset = ctx.chipset;
let gsp_falcon = ctx.gsp_falcon;
+ let sec2_falcon = ctx.sec2_falcon;
let dev = pdev.as_ref();
let hal = super::hal::gsp_hal(chipset);

@@ -118,7 +59,11 @@ pub(crate) fn boot(
let wpr_meta = Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new(&gsp_fw, &fb_layout))?;

// Perform the chipset-specific boot sequence, and retrieve the unload bundle.
- let unload_guard = hal.boot(&self, &ctx, &fb_layout, &wpr_meta)?;
+ let unload_bundle = hal.boot(&self, &ctx, &fb_layout, &wpr_meta)?;
+
+ let unload_guard = ScopeGuard::new_with_data(unload_bundle, |unload_bundle| {
+ let _ = self.unload(dev, bar, gsp_falcon, sec2_falcon, unload_bundle);
+ });

gsp_falcon.write_os_version(bar, gsp_fw.bootloader.app_version);

diff --git a/drivers/gpu/nova-core/gsp/hal.rs b/drivers/gpu/nova-core/gsp/hal.rs
index 51a277fe97bb..0d65a32f9949 100644
--- a/drivers/gpu/nova-core/gsp/hal.rs
+++ b/drivers/gpu/nova-core/gsp/hal.rs
@@ -24,7 +24,6 @@
Chipset, //
},
gsp::{
- boot::BootUnloadGuard,
Gsp,
GspBootContext,
GspFwWprMeta, //
@@ -51,15 +50,15 @@ fn run(
pub(super) trait GspHal: Send {
/// Performs the GSP boot process, loading and running the required firmwares as needed.
///
- /// Upon success, returns a guard that runs the GSP unload sequence if GSP boot does not
- /// complete.
- fn boot<'a>(
+ /// Upon success, returns the [`crate::gsp::UnloadBundle`] to use with [`Gsp::unload`], if one
+ /// could be created.
+ fn boot(
&self,
- gsp: &'a Gsp,
- ctx: &GspBootContext<'a>,
+ gsp: &Gsp,
+ ctx: &GspBootContext<'_>,
fb_layout: &FbLayout,
wpr_meta: &Coherent<GspFwWprMeta>,
- ) -> Result<BootUnloadGuard<'a>>;
+ ) -> Result<Option<crate::gsp::UnloadBundle>>;

/// Performs HAL-specific post-GSP boot tasks.
///
diff --git a/drivers/gpu/nova-core/gsp/hal/gh100.rs b/drivers/gpu/nova-core/gsp/hal/gh100.rs
index 2187e11168b2..bd15a3067ffe 100644
--- a/drivers/gpu/nova-core/gsp/hal/gh100.rs
+++ b/drivers/gpu/nova-core/gsp/hal/gh100.rs
@@ -23,7 +23,6 @@
Fsp, //
},
gsp::{
- boot::BootUnloadGuard,
hal::{
GspHal,
UnloadBundle, //
@@ -145,27 +144,22 @@ impl GspHal for Gh100 {
///
/// This path uses FSP to establish a chain of trust and boot GSP-FMC. FSP handles
/// the GSP boot internally - no manual GSP reset/boot is needed.
- fn boot<'a>(
+ fn boot(
&self,
- gsp: &'a Gsp,
- ctx: &GspBootContext<'a>,
+ gsp: &Gsp,
+ ctx: &GspBootContext<'_>,
fb_layout: &FbLayout,
wpr_meta: &Coherent<GspFwWprMeta>,
- ) -> Result<BootUnloadGuard<'a>> {
+ ) -> Result<Option<crate::gsp::UnloadBundle>> {
let dev = ctx.dev();
let bar = ctx.bar;
let chipset = ctx.chipset;
let gsp_falcon = ctx.gsp_falcon;
- let sec2_falcon = ctx.sec2_falcon;

let unload_bundle = crate::gsp::UnloadBundle(
KBox::new(FspUnloadBundle, GFP_KERNEL)? as KBox<dyn UnloadBundle>
);

- // Wrap the unload bundle into a drop guard so it is automatically run upon failure.
- let unload_guard =
- BootUnloadGuard::new(gsp, dev, bar, gsp_falcon, sec2_falcon, Some(unload_bundle));
-
let mut fsp = Fsp::wait_secure_boot(dev, bar, chipset)?;

let args = FmcBootArgs::new(
@@ -176,11 +170,13 @@ fn boot<'a>(
false,
)?;

- fsp.boot_fmc(dev, bar, fb_layout, &args)?;
+ // Keep the result as we want to wait for lockdown release even in case of error, to make
+ // sure `args` is not accessed by the GSP anymore.
+ let res = fsp.boot_fmc(dev, bar, fb_layout, &args);

wait_for_gsp_lockdown_release(dev, bar, gsp_falcon, args.boot_params_dma_handle())?;

- Ok(unload_guard)
+ res.map(|()| Some(unload_bundle))
}
}

diff --git a/drivers/gpu/nova-core/gsp/hal/tu102.rs b/drivers/gpu/nova-core/gsp/hal/tu102.rs
index 6ed4ee268086..8511cc647596 100644
--- a/drivers/gpu/nova-core/gsp/hal/tu102.rs
+++ b/drivers/gpu/nova-core/gsp/hal/tu102.rs
@@ -6,7 +6,8 @@
use kernel::{
device,
dma::Coherent,
- io::Io, //
+ io::Io,
+ types::ScopeGuard, //
};

use crate::{
@@ -32,7 +33,6 @@
},
gpu::Chipset,
gsp::{
- boot::BootUnloadGuard,
hal::{
GspHal,
UnloadBundle, //
@@ -264,13 +264,13 @@ fn run_fwsec_frts(
struct Tu102;

impl GspHal for Tu102 {
- fn boot<'a>(
+ fn boot(
&self,
- gsp: &'a Gsp,
- ctx: &GspBootContext<'a>,
+ gsp: &Gsp,
+ ctx: &GspBootContext<'_>,
fb_layout: &FbLayout,
wpr_meta: &Coherent<GspFwWprMeta>,
- ) -> Result<BootUnloadGuard<'a>> {
+ ) -> Result<Option<crate::gsp::UnloadBundle>> {
let dev = ctx.dev();
let bar = ctx.bar;
let chipset = ctx.chipset;
@@ -296,9 +296,12 @@ fn boot<'a>(
.ok()
.map(crate::gsp::UnloadBundle);

- // Wrap the unload bundle into a drop guard so it is automatically run upon failure.
- let unload_guard =
- BootUnloadGuard::new(gsp, dev, bar, gsp_falcon, sec2_falcon, unload_bundle);
+ // Run the unload bundle to try and recover the GSP if an error occurs.
+ let unload_guard = ScopeGuard::new_with_data(unload_bundle, |unload_bundle| {
+ if let Some(unload_bundle) = unload_bundle {
+ let _ = unload_bundle.0.run(dev, bar, gsp_falcon, sec2_falcon);
+ }
+ });

// FWSEC-FRTS is not executed on chips where the FRTS region size is 0 (e.g. GA100).
if !fb_layout.frts.is_empty() {
@@ -329,7 +332,7 @@ fn boot<'a>(
)?
.run(dev, bar, sec2_falcon, wpr_meta)?;

- Ok(unload_guard)
+ Ok(unload_guard.dismiss())
}

fn post_boot(&self, gsp: &Gsp, ctx: &GspBootContext<'_>, gsp_fw: &GspFirmware) -> Result {

--
2.54.0