Re: [PATCH v2 5/5] gpu: nova-core: run Booter Unloader and FWSEC-SB upon unbinding

From: Alexandre Courbot

Date: Wed Apr 22 2026 - 06:47:03 EST


On Wed Apr 22, 2026 at 3:01 PM JST, Eliot Courtney wrote:
> On Tue Apr 21, 2026 at 3:16 PM JST, Alexandre Courbot wrote:
>> When probing the driver, the FWSEC-FRTS firmware creates a WPR2 secure
>> memory region to store the GSP firmware, and the Booter Loader loads and
>> starts that firmware into the GSP, making it run in RISC-V mode.
>>
>> These operations need to be reverted upon unloading, particularly the
>> WPR2 secure region creation, as its presence prevents the driver from
>> subsequently probing.
>>
>> Thus, load and run the Booter Unloader and FWSEC-SB firmwares at unbind
>> time to put the GPU into a state where it can be probed again.
>>
>> Signed-off-by: Alexandre Courbot <acourbot@xxxxxxxxxx>
>> ---
>> drivers/gpu/nova-core/firmware/booter.rs | 1 -
>> drivers/gpu/nova-core/firmware/fwsec.rs | 1 -
>> drivers/gpu/nova-core/gpu.rs | 8 +++++-
>> drivers/gpu/nova-core/gsp/boot.rs | 43 ++++++++++++++++++++++++++++++++
>> drivers/gpu/nova-core/regs.rs | 5 ++++
>> 5 files changed, 55 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs
>> index de2a4536b532..771b018ba580 100644
>> --- a/drivers/gpu/nova-core/firmware/booter.rs
>> +++ b/drivers/gpu/nova-core/firmware/booter.rs
>> @@ -280,7 +280,6 @@ fn new_booter(data: &[u8]) -> Result<Self> {
>> #[derive(Copy, Clone, Debug, PartialEq)]
>> pub(crate) enum BooterKind {
>> Loader,
>> - #[expect(unused)]
>> Unloader,
>> }
>>
>> diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs
>> index 8810cb49db67..4108f28cd338 100644
>> --- a/drivers/gpu/nova-core/firmware/fwsec.rs
>> +++ b/drivers/gpu/nova-core/firmware/fwsec.rs
>> @@ -144,7 +144,6 @@ pub(crate) enum FwsecCommand {
>> /// image into it.
>> Frts { frts_addr: u64, frts_size: u64 },
>> /// Asks [`FwsecFirmware`] to load pre-OS apps on the PMU.
>> - #[expect(dead_code)]
>> Sb,
>> }
>>
>> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
>> index 8f2ae9e8a519..37d0e4587ed3 100644
>> --- a/drivers/gpu/nova-core/gpu.rs
>> +++ b/drivers/gpu/nova-core/gpu.rs
>> @@ -286,7 +286,13 @@ pub(crate) fn unbind(&self, dev: &device::Device<device::Core>) {
>> return;
>> };
>>
>> - let _ = kernel::warn_on_err!(self.gsp.unload(dev, bar, &self.gsp_falcon));
>> + let _ = kernel::warn_on_err!(self.gsp.unload(
>> + dev,
>> + bar,
>> + self.spec.chipset,
>> + &self.gsp_falcon,
>> + &self.sec2_falcon,
>> + ));
>>
>> self.sysmem_flush.unregister(bar);
>> }
>> diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs
>> index 3f4e99b2497b..e00cfebe5d11 100644
>> --- a/drivers/gpu/nova-core/gsp/boot.rs
>> +++ b/drivers/gpu/nova-core/gsp/boot.rs
>> @@ -267,7 +267,9 @@ pub(crate) fn unload(
>> &self,
>> dev: &device::Device<device::Bound>,
>> bar: &Bar0,
>> + chipset: Chipset,
>> gsp_falcon: &Falcon<Gsp>,
>> + sec2_falcon: &Falcon<Sec2>,
>> ) -> Result {
>> // Shut down the GSP.
>>
>> @@ -275,6 +277,47 @@ pub(crate) fn unload(
>> .inspect_err(|e| dev_err!(dev, "unload guest driver failed: {:?}", e))?;
>> dev_dbg!(dev, "GSP shut down\n");
>>
>> + // Run FWSEC-SB to reset the GSP falcon to its pre-libos state.
>> +
>> + let bios = Vbios::new(dev, bar)?;
>> + let fwsec_sb = FwsecFirmware::new(dev, gsp_falcon, bar, &bios, FwsecCommand::Sb)?;
>> +
>> + if chipset.needs_fwsec_bootloader() {
>> + let fwsec_sb_bl = FwsecFirmwareWithBl::new(fwsec_sb, dev, chipset)?;
>> + // Load and run the bootloader, which will load FWSEC-SB and run it.
>> + fwsec_sb_bl.run(dev, gsp_falcon, bar)?;
>> + } else {
>> + // Load and run FWSEC-SB directly.
>> + fwsec_sb.run(dev, gsp_falcon, bar)?;
>> + }
>> + dev_dbg!(dev, "FWSEC SB completed\n");
>> +
>> + // Remove WPR2 region if set.
>> +
>> + let wpr2_hi = bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI);
>> + if wpr2_hi.is_wpr2_set() {
>> + let booter_unloader = BooterFirmware::new(
>> + dev,
>> + BooterKind::Unloader,
>> + chipset,
>> + FIRMWARE_VERSION,
>> + sec2_falcon,
>> + bar,
>> + )?;
>> +
>> + sec2_falcon.reset(bar)?;
>> + sec2_falcon.load(dev, bar, &booter_unloader)?;
>> + let _ = sec2_falcon.boot(bar, Some(0xff), Some(0xff))?;
>
> What about a named constant if you can think of a good name for 0xff or
> a comment explaining why we need to write 0xff into the two mailboxes?
> Presumably we don't care about the return value here since we check
> success using the register read below.

OpenRM also does directly use `0xff`. These appear to be sentinel
values, as Booter Unloader is supposed to return an exit status in
`mbox0`. So if the value of `mbox0` has changed, this means that Booter
has indeed run.

Let me add a constant to actually carry that intent through its name,
and also add the missing check that the `mbox0` value has changed upon
return.

Also the sentinel value is only useful for mbox0, so let's skip it for
mbox1.

>
> Thanks for working on this, this will be a great help for avoiding
> reboots during development (since pcie reset sometimes has issues).

Yes, and this only happens on Blackwell, which you have kindly covered!
:) Which is the whole reason for sending this patchset now, so thanks to
you!