Re: [PATCH v12 04/22] gpu: nova-core: mm: Add support to use PRAMIN windows to write to VRAM
From: Alexandre Courbot
Date: Sat May 02 2026 - 11:42:40 EST
On Sun Apr 26, 2026 at 6:14 AM JST, Joel Fernandes wrote:
<snip>
> PRAMIN apertures are a crucial mechanism to direct read/write to VRAM.
> Add support for the same.
>
> Cc: Nikola Djukic <ndjukic@xxxxxxxxxx>
> Signed-off-by: Joel Fernandes <joelagnelf@xxxxxxxxxx>
> ---
> drivers/gpu/nova-core/mm.rs | 5 +
> drivers/gpu/nova-core/mm/pramin.rs | 300 +++++++++++++++++++++++++++++
> drivers/gpu/nova-core/nova_core.rs | 1 +
> drivers/gpu/nova-core/regs.rs | 10 +
> 4 files changed, 316 insertions(+)
> create mode 100644 drivers/gpu/nova-core/mm.rs
> create mode 100644 drivers/gpu/nova-core/mm/pramin.rs
>
> diff --git a/drivers/gpu/nova-core/mm.rs b/drivers/gpu/nova-core/mm.rs
> new file mode 100644
> index 000000000000..7a5dd4220c67
> --- /dev/null
> +++ b/drivers/gpu/nova-core/mm.rs
> @@ -0,0 +1,5 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +//! Memory management subsystems for nova-core.
> +
> +pub(crate) mod pramin;
> diff --git a/drivers/gpu/nova-core/mm/pramin.rs b/drivers/gpu/nova-core/mm/pramin.rs
> new file mode 100644
> index 000000000000..57b560ae1e85
> --- /dev/null
> +++ b/drivers/gpu/nova-core/mm/pramin.rs
> @@ -0,0 +1,300 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +//! Direct VRAM access through the PRAMIN aperture.
> +//!
> +//! PRAMIN provides a 1MB sliding window into VRAM through BAR0, allowing the CPU to access
> +//! video memory directly. Access is managed through a two-level API:
> +//!
> +//! - [`Pramin`]: The parent object that owns the BAR0 reference and synchronization lock.
> +//! - [`PraminWindow`]: A guard object that holds exclusive PRAMIN access for its lifetime.
> +//!
> +//! The PRAMIN aperture is a 1MB region at a fixed offset from BAR0. The window base is
> +//! controlled by an architecture-specific register and is 64KB aligned.
> +//!
> +//! # Examples
> +//!
> +//! ## Basic read/write
> +//!
> +//! ```no_run
> +//! use crate::driver::Bar0;
> +//! use crate::gpu::Chipset;
> +//! use crate::mm::pramin;
> +//! use kernel::device;
> +//! use kernel::devres::Devres;
> +//! use kernel::prelude::*;
> +//! use kernel::sync::Arc;
> +//!
> +//! fn example(
> +//! devres_bar: Arc<Devres<Bar0>>,
> +//! dev: &device::Device<device::Bound>,
> +//! chipset: Chipset,
> +//! vram_region: core::ops::Range<u64>,
> +//! ) -> Result<()> {
> +//! let pramin = Arc::pin_init(
> +//! pramin::Pramin::new(devres_bar, dev, chipset, vram_region)?,
`Pramin::new` takes different arguments (also check the other examples).
> +//! GFP_KERNEL,
> +//! )?;
> +//! let mut window = pramin.get_window(dev)?;
> +//!
> +//! // Write and read back.
> +//! window.try_write32(0x100, 0xDEADBEEF)?;
> +//! let val = window.try_read32(0x100)?;
> +//! assert_eq!(val, 0xDEADBEEF);
> +//!
> +//! Ok(())
> +//! }
> +//! ```
> +//!
> +//! ## Auto-repositioning across VRAM regions
> +//!
> +//! ```no_run
> +//! use crate::driver::Bar0;
> +//! use crate::gpu::Chipset;
> +//! use crate::mm::pramin;
> +//! use kernel::device;
> +//! use kernel::devres::Devres;
> +//! use kernel::prelude::*;
> +//! use kernel::sync::Arc;
> +//!
> +//! fn example(
> +//! devres_bar: Arc<Devres<Bar0>>,
> +//! dev: &device::Device<device::Bound>,
> +//! chipset: Chipset,
> +//! vram_region: core::ops::Range<u64>,
> +//! ) -> Result<()> {
> +//! let pramin = Arc::pin_init(
> +//! pramin::Pramin::new(devres_bar, dev, chipset, vram_region)?,
> +//! GFP_KERNEL,
> +//! )?;
> +//! let mut window = pramin.get_window(dev)?;
> +//!
> +//! // Access first 1MB region.
> +//! window.try_write32(0x100, 0x11111111)?;
> +//!
> +//! // Access at 2MB - window auto-repositions.
> +//! window.try_write32(0x200000, 0x22222222)?;
> +//!
> +//! // Back to first region - window repositions again.
> +//! let val = window.try_read32(0x100)?;
> +//! assert_eq!(val, 0x11111111);
> +//!
> +//! Ok(())
> +//! }
> +//! ```
> +
> +#![expect(unused)]
> +
> +use core::ops::Range;
> +
> +use crate::{
> + bounded_enum,
> + driver::Bar0,
> + num::IntoSafeCast,
> + regs, //
> +};
> +
> +use kernel::{
> + devres::Devres,
> + io::Io,
> + new_mutex,
> + num::Bounded,
> + prelude::*,
> + revocable::RevocableGuard,
> + sizes::{
> + SZ_1M,
> + SZ_64K, //
> + },
> + sync::{
> + lock::mutex::MutexGuard,
> + Arc,
> + Mutex, //
> + },
> +};
> +
> +bounded_enum! {
> + /// Target memory type for the BAR0 window register.
> + ///
> + /// Only VRAM is supported; Hopper+ GPUs do not support other targets.
> + #[derive(Debug)]
> + pub(crate) enum Bar0WindowTarget with TryFrom<Bounded<u32, 2>> {
> + /// Video RAM (GPU framebuffer memory).
> + Vram = 0,
> + }
> +}
> +
> +/// PRAMIN aperture base offset in BAR0.
> +const PRAMIN_BASE: usize = 0x700000;
> +
> +/// PRAMIN aperture size (1MB).
> +const PRAMIN_SIZE: usize = SZ_1M;
> +
> +/// Generate a PRAMIN read accessor.
> +macro_rules! define_pramin_read {
> + ($name:ident, $ty:ty) => {
> + #[doc = concat!("Read a `", stringify!($ty), "` from VRAM at the given offset.")]
> + pub(crate) fn $name(&mut self, vram_offset: usize) -> Result<$ty> {
> + let (bar_offset, new_base) =
> + self.compute_window(vram_offset, ::core::mem::size_of::<$ty>())?;
> +
> + if let Some(base) = new_base {
> + Self::write_window_base(&self.bar, base)?;
> + *self.state = base;
> + }
> + self.bar.$name(bar_offset)
> + }
> + };
> +}
> +
> +/// Generate a PRAMIN write accessor.
> +macro_rules! define_pramin_write {
> + ($name:ident, $ty:ty) => {
> + #[doc = concat!("Write a `", stringify!($ty), "` to VRAM at the given offset.")]
> + pub(crate) fn $name(&mut self, vram_offset: usize, value: $ty) -> Result {
> + let (bar_offset, new_base) =
> + self.compute_window(vram_offset, ::core::mem::size_of::<$ty>())?;
> +
> + if let Some(base) = new_base {
> + Self::write_window_base(&self.bar, base)?;
> + *self.state = base;
> + }
> + self.bar.$name(value, bar_offset)
> + }
> + };
> +}
> +
> +/// PRAMIN aperture manager.
> +///
> +/// Call [`Pramin::get_window()`] to acquire exclusive PRAMIN access.
> +#[pin_data]
> +pub(crate) struct Pramin {
> + bar: Arc<Devres<Bar0>>,
The HRT series [1] will allow you to greatly simplify all this by
storing a `&'a Bar0` directly in this structure. It will most likely
land this cycle, so I think it's a good idea to proactively depend on
them. It should apply cleanly (modulo the Tyr patches IIRC, but you can
skip them if you don't build the driver) on top of `drm-rust-next` - on
top of which this should also be rebased anyway.
[1] https://lore.kernel.org/all/20260427221155.2144848-1-dakr@xxxxxxxxxx/
> + /// Valid VRAM region. Accesses outside this range are rejected.
> + vram_region: Range<u64>,
I don't think that checking the range in this type is particularly
useful, because this abstraction cannot validate VRAM ownership
completely: VRAM can be sparse, reserved, or protected, and with the
current bound access to these invalid areas is not prevented.
PRAMIN should only model the hardware windowing mechanism; address
validity belongs to the higher layers that allocate or obtain those
addresses. IIUC nothing will explode if we try to read or write into
areas that are not VRAM.
> + /// PRAMIN aperture state, protected by a mutex.
> + ///
> + /// # Invariants
> + ///
> + /// This lock is acquired during the DMA fence signaling critical path.
> + /// It must NEVER be held across any reclaimable CPU memory / allocations
> + /// (`GFP_KERNEL`), because the memory reclaim path can call
> + /// `dma_fence_wait()`, which would deadlock with this lock held.
> + #[pin]
> + state: Mutex<u64>,
The problem with this lock is that it is buried, alongside its usage
guide, deep into this type, ensuring that users won't ever see it. It
will create deadlocks rather than preventing them.
And when you look closer at it, you realize it is actually acquired for
any use of `Pramin`, since the only method it exposes is `get_window`,
which acquires the lock, and returns a `PraminWindow` which has exactly
the same layout as `Pramin`, except that the lock is acquired.
So what's the point? Let's just make `get_window` require a `&mut self`
and let the owner of `Pramin` decide how to manage concurrent accesses.
At least the doccomment for the lock will be visible from a higher
layer.
Currently `Pramin` mixes elements of hardware access, memory management,
and synchronization into the same type, squashing what should be
different layers into a single one. Now that the `Io` trait is merged
and available in `drm-rust-next`, it is a good time to implement the
feedback I gave on v8. Roughly:
- `Pramin` should just be the owner and arbiter of the
`NV_PBUS_BAR0_WINDOW` register. Its `get_window(&mut self,
window_base: Bounded<u64, 24>)` method simply returns a window to the
1MB area starting from `base << 16`. It's the simplest possible
abstraction.
- The returned window implements `Io` and `IoKnownSize`. This will allow
users to do all the fancy stuff that comes with `Io`, including
projections.
This API will also be perfect for cases like e.g. setting a memory
page to zero: just give the start of the page to `get_window`, zap the
first 4K of the window, done.
- Random accesses to VRAM, which are needed for walking the page tables
and updating them, can be built on top of this simple API. Either by
adding read/write ops directly to `Pramin`, or by defining another
type that owns a `&mut Pramin` and moves the window automatically if
the next access is out of bounds. This should result in window
management code that is simpler than the current `compute_window`.
That's really all we need. Then `GpuMm` can manage concurrency similarly
to the current model by wrapping `Pramin` inside a `Mutex`, and is also
free to experiment with different locking strategies, something the
current design doesn't allow.
> +}
> +
> +impl Pramin {
> + /// Create a pin-initializer for PRAMIN.
> + ///
> + /// `vram_region` specifies the valid VRAM address range.
> + pub(crate) fn new(
> + bar: Arc<Devres<Bar0>>,
> + vram_region: Range<u64>,
> + ) -> Result<impl PinInit<Self>> {
> + let bar_access = bar.try_access().ok_or(ENODEV)?;
> + let current_base = Self::read_window_base(&bar_access);
> +
> + Ok(pin_init!(Self {
> + bar,
> + vram_region,
> + state <- new_mutex!(current_base, "pramin_state"),
> + }))
> + }
> +
> + /// Acquire exclusive PRAMIN access.
> + ///
> + /// Returns a [`PraminWindow`] guard that provides VRAM read/write accessors.
> + /// The [`PraminWindow`] is exclusive and only one can exist at a time.
> + pub(crate) fn get_window(&self) -> Result<PraminWindow<'_>> {
> + let bar = self.bar.try_access().ok_or(ENODEV)?;
> + let state = self.state.lock();
> + Ok(PraminWindow {
> + bar,
> + vram_region: self.vram_region.clone(),
> + state,
> + })
> + }
> +
> + /// Read the current window base from the BAR0_WINDOW register.
> + fn read_window_base(bar: &Bar0) -> u64 {
> + let reg = bar.read(regs::NV_PBUS_BAR0_WINDOW);
> +
> + // TODO: Convert to Bounded<u64, 40> when available.
It's available. :)
> + u64::from(reg.window_base()) << 16
> + }
I think this method (and `write_window_base`) should be methods of the
`NV_PBUS_BAR0_WINDOW` register (`window_base` and `set_window_base`)
> +}
> +
> +/// PRAMIN window guard for direct VRAM access.
> +///
> +/// This guard holds exclusive access to the PRAMIN aperture. The window auto-repositions
> +/// when accessing VRAM offsets outside the current 1MB range.
> +///
> +/// Only one [`PraminWindow`] can exist at a time per [`Pramin`] instance (enforced by the
> +/// internal `MutexGuard`).
> +pub(crate) struct PraminWindow<'a> {
> + bar: RevocableGuard<'a, Bar0>,
> + vram_region: Range<u64>,
> + state: MutexGuard<'a, u64>,
> +}
> +
> +impl PraminWindow<'_> {
> + /// Write a new window base to the BAR0_WINDOW register.
> + fn write_window_base(bar: &Bar0, base: u64) -> Result {
> + // CAST: After >> 16, a VRAM address fits in u32.
> + let window_base = (base >> 16) as u32;
> + bar.write_reg(
> + regs::NV_PBUS_BAR0_WINDOW::zeroed()
> + .with_target(Bar0WindowTarget::Vram)
> + .try_with_window_base(window_base)?,
> + );
> + Ok(())
> + }
> +
> + /// Compute window parameters for a VRAM access.
> + ///
> + /// Returns (`bar_offset`, `new_base`) where:
> + /// - `bar_offset`: The BAR0 offset to use for the access.
> + /// - `new_base`: `Some(base)` if window needs repositioning, `None` otherwise.
> + fn compute_window(
> + &self,
> + vram_offset: usize,
This should be a `u64`, since it is a VRAM address.
Actually, I noticed that you introduced a `VramAddress` type in a latter
patch. It might be worth introducing it earlier and using it everywhere
a VRAM address is involved, that way we cannot make any mistake - not
critical for now, but feel free to do it if you think it helps.