Re: [PATCH v7 05/31] gpu: nova-core: set DMA mask width based on GPU architecture

From: Alexandre Courbot

Date: Mon Mar 23 2026 - 09:11:12 EST

On Wed Mar 18, 2026 at 7:53 AM JST, John Hubbard wrote:
> Replace the hardcoded 47-bit DMA mask with per-architecture values.
> Hopper and Blackwell support 52-bit DMA addresses, while Turing,
> Ampere, and Ada use 47-bit.
>
> Add Architecture::dma_mask() as a const method with an exhaustive
> match, so new architectures get a compile-time reminder to specify
> their DMA mask width. Move Spec creation into probe() so the
> architecture is known before setting the DMA mask, and pass the Spec
> into Gpu::new().
>
> Cc: Danilo Krummrich <dakr@xxxxxxxxxx>
> Cc: Gary Guo <gary@xxxxxxxxxxx>

Why the Ccs? Some patches in the series seem to have random people Cc'd
to them.

> Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx>
> ---
> drivers/gpu/nova-core/driver.rs | 28 +++++++--------
> drivers/gpu/nova-core/gpu.rs | 60 +++++++++++++++++++--------------
> 2 files changed, 47 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
> index 84b0e1703150..41227d29934e 100644
> --- a/drivers/gpu/nova-core/driver.rs
> +++ b/drivers/gpu/nova-core/driver.rs
> @@ -5,7 +5,6 @@
> device::Core,
> devres::Devres,
> dma::Device,
> - dma::DmaMask,
> pci,
> pci::{
> Class,
> @@ -23,7 +22,10 @@
> },
> };
>
> -use crate::gpu::Gpu;
> +use crate::gpu::{
> + Gpu,
> + Spec, //
> +};
>
> /// Counter for generating unique auxiliary device IDs.
> static AUXILIARY_ID_COUNTER: Atomic<u32> = Atomic::new(0);
> @@ -38,14 +40,6 @@ pub(crate) struct NovaCore {
>
> const BAR0_SIZE: usize = SZ_16M;
>
> -// For now we only support Ampere which can use up to 47-bit DMA addresses.
> -//
> -// TODO: Add an abstraction for this to support newer GPUs which may support
> -// larger DMA addresses. Limiting these GPUs to smaller address widths won't
> -// have any adverse affects, unless installed on systems which require larger
> -// DMA addresses. These systems should be quite rare.
> -const GPU_DMA_BITS: u32 = 47;
> -
> pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
>
> kernel::pci_device_table!(
> @@ -84,18 +78,20 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, E
> pdev.enable_device_mem()?;
> pdev.set_master();
>
> - // SAFETY: No concurrent DMA allocations or mappings can be made because
> - // the device is still being probed and therefore isn't being used by
> - // other threads of execution.
> - unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? };
> -
> let bar = Arc::pin_init(
> pdev.iomap_region_sized::<BAR0_SIZE>(0, c"nova-core/bar0"),
> GFP_KERNEL,
> )?;
> + let spec = Spec::new(pdev.as_ref(), bar.access(pdev.as_ref())?)?;
> + dev_info!(pdev, "NVIDIA ({})\n", spec);
> +
> + // SAFETY: No concurrent DMA allocations or mappings can be made because
> + // the device is still being probed and therefore isn't being used by
> + // other threads of execution.
> + unsafe { pdev.dma_set_mask_and_coherent(spec.chipset().arch().dma_mask())? };
>
> Ok(try_pin_init!(Self {
> - gpu <- Gpu::new(pdev, bar.clone(), bar.access(pdev.as_ref())?),
> + gpu <- Gpu::new(pdev, bar.clone(), bar.access(pdev.as_ref())?, spec),
> _reg <- auxiliary::Registration::new(
> pdev.as_ref(),
> c"nova-drm",
> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
> index 8f317d213908..9e140463603b 100644
> --- a/drivers/gpu/nova-core/gpu.rs
> +++ b/drivers/gpu/nova-core/gpu.rs
> @@ -3,6 +3,7 @@
> use kernel::{
> device,
> devres::Devres,
> + dma::DmaMask,
> fmt,
> pci,
> prelude::*,
> @@ -162,6 +163,19 @@ pub(crate) enum Architecture {
> Blackwell = 0x1b,
> }
>
> +impl Architecture {
> + /// Returns the DMA mask supported by this architecture.
> + ///
> + /// Hopper and Blackwell support 52-bit DMA addresses, while earlier
> + /// architectures (Turing, Ampere, Ada) support 47-bit.

This last sentence is unneeded, we describe what methods provide in
doccomments, not how they do it or what the result will be.

> + pub(crate) const fn dma_mask(&self) -> DmaMask {
> + match self {
> + Self::Turing | Self::Ampere | Self::Ada => DmaMask::new::<47>(),
> + Self::Hopper | Self::Blackwell => DmaMask::new::<52>(),
> + }
> + }
> +}

I see you introduce a `Gpu` HAL in the next patch. I think this should
also be part of the HAL - there is no benefit in having this method
const since the architecture is probed at runtime anyway.

> +
> impl TryFrom<u8> for Architecture {
> type Error = Error;
>
> @@ -211,7 +225,7 @@ pub(crate) struct Spec {
> }
>
> impl Spec {
> - fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
> + pub(crate) fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
> // Some brief notes about boot0 and boot42, in chronological order:
> //
> // NV04 through NV50:
> @@ -292,38 +306,34 @@ pub(crate) fn new<'a>(
> pdev: &'a pci::Device<device::Bound>,
> devres_bar: Arc<Devres<Bar0>>,
> bar: &'a Bar0,
> + spec: Spec,
> ) -> impl PinInit<Self, Error> + 'a {
> - pin_init::pin_init_scope(move || {
> - let spec = Spec::new(pdev.as_ref(), bar)?;
> - dev_info!(pdev, "NVIDIA ({})\n", spec);
> -
> - let chipset = spec.chipset();
> + let chipset = spec.chipset();
>
> - Ok(try_pin_init!(Self {
> - // We must wait for GFW_BOOT completion before doing any significant setup
> - // on the GPU.
> - _: {
> - gfw::wait_gfw_boot_completion(bar)
> - .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
> - },
> + try_pin_init!(Self {

What, and now we undo what we just did in patch 4? 0_o; What was that
all for?

I did a `git diff` between this step in the series and the state two
steps above, and it seems to confirm my intuition on patch 4: you just
need a few more `Copy` implementations. They can be added in this patch,
and patch 4 dropped altogether.