[PATCH v11 14/22] gpu: nova-core: Hopper/Blackwell: add FSP falcon EMEM operations

From: John Hubbard

Date: Fri May 29 2026 - 23:18:28 EST


Add external memory (EMEM) read/write operations to the GPU's FSP falcon
engine. These operations use Falcon PIO (Programmed I/O) to communicate
with the FSP through indirect memory access.

Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx>
---
drivers/gpu/nova-core/falcon/fsp.rs | 130 ++++++++++++++++++++++++++--
drivers/gpu/nova-core/regs.rs | 15 ++++
2 files changed, 140 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/nova-core/falcon/fsp.rs b/drivers/gpu/nova-core/falcon/fsp.rs
index 73fb73cb73a5..7067c1963745 100644
--- a/drivers/gpu/nova-core/falcon/fsp.rs
+++ b/drivers/gpu/nova-core/falcon/fsp.rs
@@ -6,12 +6,28 @@
//! The FSP falcon handles secure boot and Chain of Trust operations
//! on Hopper and Blackwell architectures, replacing SEC2's role.

-use kernel::io::register::RegisterBase;
+use kernel::{
+ io::{
+ register::{
+ RegisterBase,
+ WithBase, //
+ },
+ Io, //
+ },
+ num::Bounded,
+ prelude::*,
+ ptr::Alignment, //
+};

-use crate::falcon::{
- FalconEngine,
- PFalcon2Base,
- PFalconBase, //
+use crate::{
+ driver::Bar0,
+ falcon::{
+ Falcon,
+ FalconEngine,
+ PFalcon2Base,
+ PFalconBase, //
+ },
+ regs,
};

/// Type specifying the `Fsp` falcon engine. Cannot be instantiated.
@@ -26,3 +42,107 @@ impl RegisterBase<PFalcon2Base> for Fsp {
}

impl FalconEngine for Fsp {}
+
+/// Maximum addressable EMEM size, derived from the 24-bit offset field
+/// in `NV_PFALCON_FALCON_EMEM_CTL`.
+const EMEM_MAX_SIZE: Alignment = Alignment::new::<{ 1 << 24 }>();
+
+/// I/O backend for the FSP falcon's external memory (EMEM).
+///
+/// `EMEM_CTL` is programmed once with a start offset and an auto-increment
+/// mode, then each access to `EMEM_DATA` advances the offset by one 32-bit
+/// word in hardware.
+struct Emem<'a> {
+ bar: &'a Bar0,
+}
+
+impl<'a> Emem<'a> {
+ fn new(bar: &'a Bar0) -> Self {
+ Self { bar }
+ }
+
+ /// Programs `EMEM_CTL` with the start byte `offset` and the `ctl` mode bits.
+ ///
+ /// Returns `EINVAL` if `offset` is outside the addressable EMEM window.
+ fn program(&mut self, offset: usize, ctl: regs::NV_PFALCON_FALCON_EMEM_CTL) -> Result {
+ let offset = Bounded::<usize, { EMEM_MAX_SIZE.log2() }>::try_new(offset)
+ .map(Bounded::cast::<u32>)
+ .ok_or(EINVAL)?;
+
+ self.bar
+ .write(WithBase::of::<Fsp>(), ctl.with_offset(offset));
+
+ Ok(())
+ }
+
+ /// Begins a write burst at byte `offset`, auto-incrementing on each write.
+ fn begin_write(&mut self, offset: usize) -> Result {
+ self.program(
+ offset,
+ regs::NV_PFALCON_FALCON_EMEM_CTL::zeroed().with_auto_increment_write(true),
+ )
+ }
+
+ /// Begins a read burst at byte `offset`, auto-incrementing on each read.
+ fn begin_read(&mut self, offset: usize) -> Result {
+ self.program(
+ offset,
+ regs::NV_PFALCON_FALCON_EMEM_CTL::zeroed().with_auto_increment_read(true),
+ )
+ }
+
+ /// Writes the next 32-bit `value`; hardware advances the offset.
+ fn write_next(&mut self, value: u32) {
+ self.bar.write(
+ WithBase::of::<Fsp>(),
+ regs::NV_PFALCON_FALCON_EMEM_DATA::zeroed().with_data(value),
+ );
+ }
+
+ /// Reads the next 32-bit word; hardware advances the offset.
+ fn read_next(&mut self) -> u32 {
+ self.bar
+ .read(regs::NV_PFALCON_FALCON_EMEM_DATA::of::<Fsp>())
+ .data()
+ }
+}
+
+impl Falcon<Fsp> {
+ /// Writes `data` to FSP external memory at byte `offset`.
+ ///
+ /// `data` is interpreted as little-endian 32-bit words. Returns `EINVAL`
+ /// if `offset` or the `data` length is not 4-byte aligned.
+ #[expect(dead_code)]
+ fn write_emem(&mut self, bar: &Bar0, offset: u32, data: &[u8]) -> Result {
+ if offset % 4 != 0 || data.len() % 4 != 0 {
+ return Err(EINVAL);
+ }
+
+ let mut emem = Emem::new(bar);
+ emem.begin_write(offset as usize)?;
+ for chunk in data.chunks_exact(4) {
+ emem.write_next(u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]));
+ }
+
+ Ok(())
+ }
+
+ /// Reads FSP external memory at byte `offset` into `data`.
+ ///
+ /// `data` is stored as little-endian 32-bit words. Returns `EINVAL` if
+ /// `offset` or the `data` length is not 4-byte aligned.
+ #[expect(dead_code)]
+ fn read_emem(&mut self, bar: &Bar0, offset: u32, data: &mut [u8]) -> Result {
+ if offset % 4 != 0 || data.len() % 4 != 0 {
+ return Err(EINVAL);
+ }
+
+ let mut emem = Emem::new(bar);
+ emem.begin_read(offset as usize)?;
+ for chunk in data.chunks_exact_mut(4) {
+ chunk.copy_from_slice(&emem.read_next().to_le_bytes());
+ }
+
+ Ok(())
+ }
+}
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index 270779d31ab3..5871bbce7052 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -467,6 +467,21 @@ pub(crate) fn vga_workspace_addr(self) -> Option<u64> {
pub(crate) NV_PFALCON_FBIF_CTL(u32) @ PFalconBase + 0x00000624 {
7:7 allow_phys_no_ctx => bool;
}
+
+ // Falcon EMEM PIO registers (used by FSP on Hopper/Blackwell).
+ // These provide the falcon external memory communication interface.
+ pub(crate) NV_PFALCON_FALCON_EMEM_CTL(u32) @ PFalconBase + 0x00000ac0 {
+ /// EMEM byte offset (must be 4-byte aligned).
+ 23:0 offset;
+ /// Auto-increment the offset after each write.
+ 24:24 auto_increment_write => bool;
+ /// Auto-increment the offset after each read.
+ 25:25 auto_increment_read => bool;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_EMEM_DATA(u32) @ PFalconBase + 0x00000ac4 {
+ 31:0 data => u32;
+ }
}

impl NV_PFALCON_FALCON_DMACTL {
--
2.54.0