[PATCH v5 09/38] gpu: nova-core: move GPU init and DMA mask setup into Gpu::new()

From: John Hubbard

Date: Fri Feb 20 2026 - 21:13:53 EST

Move Spec creation, the dev_info log, and DMA mask setup from the
driver's probe() into Gpu::new(), so that all GPU-specific
initialization lives in the Gpu constructor.

This restructures Gpu::new() to use pin_init_scope wrapping
try_pin_init!, which allows running fallible setup code (Spec::new,
dma_set_mask_and_coherent) before the pin-initializer. The parameter
type changes from pci::Device<device::Bound> to pci::Device<device::Core>
because the DMA call requires the Core device state.

Also makes Chipset::arch() const, adds Spec::chipset() accessor, and
makes Spec::new() pub(crate) for use by later patches.

No functional change: the same 47-bit DMA mask is applied.

Cc: Danilo Krummrich <dakr@xxxxxxxxxx>
Cc: Gary Guo <gary@xxxxxxxxxxx>
Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx>
---
drivers/gpu/nova-core/driver.rs | 15 --------
drivers/gpu/nova-core/gpu.rs | 66 ++++++++++++++++++++++-----------
2 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
index e887bcc3187f..a26777552710 100644
--- a/drivers/gpu/nova-core/driver.rs
+++ b/drivers/gpu/nova-core/driver.rs
@@ -9,8 +9,6 @@
auxiliary,
device::Core,
devres::Devres,
- dma::Device,
- dma::DmaMask,
pci,
pci::{
Class,
@@ -37,14 +35,6 @@ pub(crate) struct NovaCore {

const BAR0_SIZE: usize = SZ_16M;

-// For now we only support Ampere which can use up to 47-bit DMA addresses.
-//
-// TODO: Add an abstraction for this to support newer GPUs which may support
-// larger DMA addresses. Limiting these GPUs to smaller address widths won't
-// have any adverse affects, unless installed on systems which require larger
-// DMA addresses. These systems should be quite rare.
-const GPU_DMA_BITS: u32 = 47;
-
pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;

kernel::pci_device_table!(
@@ -83,11 +73,6 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, E
pdev.enable_device_mem()?;
pdev.set_master();

- // SAFETY: No concurrent DMA allocations or mappings can be made because
- // the device is still being probed and therefore isn't being used by
- // other threads of execution.
- unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? };
-
let bar = Arc::pin_init(
pdev.iomap_region_sized::<BAR0_SIZE>(0, c"nova-core/bar0"),
GFP_KERNEL,
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index b6a898008a59..93bf1c7b3ea1 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -3,6 +3,10 @@
use kernel::{
device,
devres::Devres,
+ dma::{
+ Device,
+ DmaMask, //
+ },
fmt,
pci,
prelude::*,
@@ -102,7 +106,7 @@ fn try_from(value: u32) -> Result<Self, Self::Error> {
});

impl Chipset {
- pub(crate) fn arch(&self) -> Architecture {
+ pub(crate) const fn arch(&self) -> Architecture {
match self {
Self::TU102 | Self::TU104 | Self::TU106 | Self::TU117 | Self::TU116 => {
Architecture::Turing
@@ -155,6 +159,10 @@ pub(crate) enum Architecture {
Blackwell = 0x1b,
}

+// TODO: Set the DMA mask per-architecture. Hopper and Blackwell support 52-bit
+// DMA addresses. For now, use 47-bit which is correct for Turing, Ampere, and Ada.
+const GPU_DMA_BITS: u32 = 47;
+
impl TryFrom<u8> for Architecture {
type Error = Error;

@@ -204,7 +212,7 @@ pub(crate) struct Spec {
}

impl Spec {
- fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
+ pub(crate) fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
// Some brief notes about boot0 and boot42, in chronological order:
//
// NV04 through NV50:
@@ -234,6 +242,10 @@ fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
dev_err!(dev, "Unsupported chipset: {}\n", boot42);
})
}
+
+ pub(crate) fn chipset(&self) -> Chipset {
+ self.chipset
+ }
}

impl TryFrom<regs::NV_PMC_BOOT_42> for Spec {
@@ -278,36 +290,46 @@ pub(crate) struct Gpu {

impl Gpu {
pub(crate) fn new<'a>(
- pdev: &'a pci::Device<device::Bound>,
+ pdev: &'a pci::Device<device::Core>,
devres_bar: Arc<Devres<Bar0>>,
bar: &'a Bar0,
) -> impl PinInit<Self, Error> + 'a {
- try_pin_init!(Self {
- spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| {
- dev_info!(pdev, "NVIDIA ({})\n", spec);
- })?,
+ pin_init::pin_init_scope(move || {
+ let spec = Spec::new(pdev.as_ref(), bar)?;
+ dev_info!(pdev, "NVIDIA ({})\n", spec);
+
+ // SAFETY: No concurrent DMA allocations or mappings can be made because
+ // the device is still being probed and therefore isn't being used by
+ // other threads of execution.
+ unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? };
+
+ let chipset = spec.chipset();

- // We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
- _: {
- gfw::wait_gfw_boot_completion(bar)
- .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
- },
+ Ok(try_pin_init!(Self {
+ // We must wait for GFW_BOOT completion before doing any significant setup
+ // on the GPU.
+ _: {
+ gfw::wait_gfw_boot_completion(bar)
+ .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
+ },

- sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?,
+ sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, chipset)?,

- gsp_falcon: Falcon::new(
- pdev.as_ref(),
- spec.chipset,
- )
- .inspect(|falcon| falcon.clear_swgen0_intr(bar))?,
+ gsp_falcon: Falcon::new(
+ pdev.as_ref(),
+ chipset,
+ )
+ .inspect(|falcon| falcon.clear_swgen0_intr(bar))?,

- sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?,
+ sec2_falcon: Falcon::new(pdev.as_ref(), chipset)?,

- gsp <- Gsp::new(pdev),
+ gsp <- Gsp::new(pdev),

- _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? },
+ _: { gsp.boot(pdev, bar, chipset, gsp_falcon, sec2_falcon)? },

- bar: devres_bar,
+ bar: devres_bar,
+ spec,
+ }))
})
}

--
2.53.0