Re: [PATCH v11 07/20] gpu: nova-core: mm: Add TLB flush support

From: Joel Fernandes

Date: Thu Apr 16 2026 - 17:23:28 EST


On Wed, Apr 15, 2026 at 05:05:34PM -0400, Joel Fernandes wrote:
> Add TLB (Translation Lookaside Buffer) flush support for GPU MMU.
>
> After modifying page table entries, the GPU's TLB must be invalidated
> to ensure the new mappings take effect. The Tlb struct provides flush
> functionality through BAR0 registers.
>
> The flush operation writes the page directory base address and triggers
> an invalidation, polling for completion with a 2 second timeout matching
> the Nouveau driver.
>
> Cc: Nikola Djukic <ndjukic@xxxxxxxxxx>
> Signed-off-by: Joel Fernandes <joelagnelf@xxxxxxxxxx>
> ---
> drivers/gpu/nova-core/mm.rs | 1 +
> drivers/gpu/nova-core/mm/tlb.rs | 97 +++++++++++++++++++++++++++++++++
> drivers/gpu/nova-core/regs.rs | 44 +++++++++++++++
> 3 files changed, 142 insertions(+)
> create mode 100644 drivers/gpu/nova-core/mm/tlb.rs
>
> diff --git a/drivers/gpu/nova-core/mm.rs b/drivers/gpu/nova-core/mm.rs
> index fa29f525f282..314d660d898b 100644
> --- a/drivers/gpu/nova-core/mm.rs
> +++ b/drivers/gpu/nova-core/mm.rs
> @@ -25,6 +25,7 @@ fn from(pfn: Pfn) -> Self {
> }
>
> pub(crate) mod pramin;
> +pub(super) mod tlb;
>
> use kernel::{
> bitfield,
> diff --git a/drivers/gpu/nova-core/mm/tlb.rs b/drivers/gpu/nova-core/mm/tlb.rs
> new file mode 100644
> index 000000000000..6d384f447635
> --- /dev/null
> +++ b/drivers/gpu/nova-core/mm/tlb.rs
> @@ -0,0 +1,97 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +//! TLB (Translation Lookaside Buffer) flush support for GPU MMU.
> +//!
> +//! After modifying page table entries, the GPU's TLB must be flushed to
> +//! ensure the new mappings take effect. This module provides TLB flush
> +//! functionality for virtual memory managers.
> +//!
> +//! # Examples
> +//!
> +//! ```ignore
> +//! use crate::mm::tlb::Tlb;
> +//!
> +//! fn page_table_update(tlb: &Tlb, pdb_addr: VramAddress) -> Result<()> {
> +//! // ... modify page tables ...
> +//!
> +//! // Flush TLB to make changes visible (polls for completion).
> +//! tlb.flush(pdb_addr)?;
> +//!
> +//! Ok(())
> +//! }
> +//! ```
> +
> +use kernel::{
> + devres::Devres,
> + io::poll::read_poll_timeout,
> + io::Io,
> + new_mutex,
> + prelude::*,
> + sync::{
> + Arc,
> + Mutex, //
> + },
> + time::Delta, //
> +};
> +
> +use crate::{
> + driver::Bar0,
> + mm::VramAddress,
> + regs, //
> +};
> +
> +/// TLB manager for GPU translation buffer operations.
> +#[pin_data]
> +pub(crate) struct Tlb {
> + bar: Arc<Devres<Bar0>>,
> + /// TLB flush serialization lock: This lock is designed to be acquired during
> + /// the DMA fence signalling critical path. It should NEVER be held across any
> + /// reclaimable CPU memory allocations because the memory reclaim path can
> + /// call `dma_fence_wait()` (when implemented), which would deadlock if lock held.
> + #[pin]
> + lock: Mutex<()>,
> +}
> +
> +impl Tlb {
> + /// Create a new TLB manager.
> + pub(super) fn new(bar: Arc<Devres<Bar0>>) -> impl PinInit<Self> {
> + pin_init!(Self {
> + bar,
> + lock <- new_mutex!((), "tlb_flush"),
> + })
> + }
> +
> + /// Flush the GPU TLB for a specific page directory base.
> + ///
> + /// This invalidates all TLB entries associated with the given PDB address.
> + /// Must be called after modifying page table entries to ensure the GPU sees
> + /// the updated mappings.
> + pub(super) fn flush(&self, pdb_addr: VramAddress) -> Result {
> + let _guard = self.lock.lock();
> +
> + let bar = self.bar.try_access().ok_or(ENODEV)?;
> +
> + // Write PDB address.
> + bar.write_reg(regs::NV_TLB_FLUSH_PDB_LO::from_pdb_addr(pdb_addr.raw_u64()));
> + bar.write_reg(regs::NV_TLB_FLUSH_PDB_HI::from_pdb_addr(pdb_addr.raw_u64()));
> +
> + // Trigger flush: invalidate all pages, require global acknowledgment
> + // from all engines before completion.
> + bar.write_reg(
> + regs::NV_TLB_FLUSH_CTRL::zeroed()
> + .with_page_all(true)
> + .with_ack_globally(true)
> + .with_enable(true),
> + );
> +
> + // Poll for completion - enable bit clears when flush is done.
> + read_poll_timeout(
> + || Ok(bar.read(regs::NV_TLB_FLUSH_CTRL)),
> + |ctrl: &regs::NV_TLB_FLUSH_CTRL| !ctrl.enable(),
> + Delta::ZERO,
> + Delta::from_secs(2),
> + )?;
> +
> + Ok(())
> + }
> +}

Btw, I changed this to doing it in 2 phases to avoid holding the RCU readlock
across read_poll_timeout, which can sleep. Will squash it in for v12.

---8<-----------------------

diff --git a/drivers/gpu/nova-core/mm/tlb.rs b/drivers/gpu/nova-core/mm/tlb.rs
index 6d384f447635..3a65db7d9a1a 100644
--- a/drivers/gpu/nova-core/mm/tlb.rs
+++ b/drivers/gpu/nova-core/mm/tlb.rs
@@ -69,24 +69,35 @@ pub(super) fn new(bar: Arc<Devres<Bar0>>) -> impl PinInit<Self> {
pub(super) fn flush(&self, pdb_addr: VramAddress) -> Result {
let _guard = self.lock.lock();

- let bar = self.bar.try_access().ok_or(ENODEV)?;
+ // Broken into 2 phases with scopes (Write and Poll) to avoid holding
+ // RevecablableGuard (and hence RCU read-side critical section) across
+ // the read_poll_timeout() call that can sleep.

- // Write PDB address.
- bar.write_reg(regs::NV_TLB_FLUSH_PDB_LO::from_pdb_addr(pdb_addr.raw_u64()));
- bar.write_reg(regs::NV_TLB_FLUSH_PDB_HI::from_pdb_addr(pdb_addr.raw_u64()));
+ // Write phase — hold bar access briefly for register writes only.
+ {
+ let bar = self.bar.try_access().ok_or(ENODEV)?;

- // Trigger flush: invalidate all pages, require global acknowledgment
- // from all engines before completion.
- bar.write_reg(
- regs::NV_TLB_FLUSH_CTRL::zeroed()
- .with_page_all(true)
- .with_ack_globally(true)
- .with_enable(true),
- );
+ // Write PDB address.
+ bar.write_reg(regs::NV_TLB_FLUSH_PDB_LO::from_pdb_addr(pdb_addr.raw_u64()));
+ bar.write_reg(regs::NV_TLB_FLUSH_PDB_HI::from_pdb_addr(pdb_addr.raw_u64()));

- // Poll for completion - enable bit clears when flush is done.
+ // Trigger flush: invalidate all pages, require global acknowledgment
+ // from all engines before completion.
+ bar.write_reg(
+ regs::NV_TLB_FLUSH_CTRL::zeroed()
+ .with_page_all(true)
+ .with_ack_globally(true)
+ .with_enable(true),
+ );
+ }
+
+ // Poll for completion — re-acquire bar access each iteration to avoid
+ // holding the RCU read-side lock (via RevocableGuard) across sleep.
read_poll_timeout(
- || Ok(bar.read(regs::NV_TLB_FLUSH_CTRL)),
+ || {
+ let bar = self.bar.try_access().ok_or(ENODEV)?;
+ Ok(bar.read(regs::NV_TLB_FLUSH_CTRL))
+ },
|ctrl: &regs::NV_TLB_FLUSH_CTRL| !ctrl.enable(),
Delta::ZERO,
Delta::from_secs(2),
--
2.34.1