[PATCH v3] gpu: nova-core: fix stack overflow in GSP memory allocation

From: Tim Kovalenko via B4 Relay

Date: Tue Feb 17 2026 - 23:02:18 EST

From: Tim Kovalenko <tim.kovalenko@xxxxxxxxx>

The `Cmdq::new` function was allocating a `PteArray` struct on the stack
and was causing a stack overflow with 8216 bytes.

Modify the `PteArray` to calculate and write the Page Table Entries
directly into the coherent DMA buffer one-by-one. This reduces the stack
usage quite a lot.

Signed-off-by: Tim Kovalenko <tim.kovalenko@xxxxxxxxx>
---
Changes in v3:
- Addressed the comments and re-instated the PteArray type.
- PteArray now uses `init` instead of `new` where it writes to `self`
page by page.
- PteArray just needs a pte pointer obtained from the `gsp_mem.as_slice_mut`.

I hope I understood everything in the V2 email chain and implemented it correctly :)

- Link to v2: https://lore.kernel.org/r/20260213-drm-rust-next-v2-1-aa094f78721a@xxxxxxxxx

Changes in v2:
- Missed a code formatting issue.
- Link to v1: https://lore.kernel.org/r/20260212-drm-rust-next-v1-1-409398b12e61@xxxxxxxxx
---
drivers/gpu/nova-core/gsp.rs | 34 +++++++++++++++++++++++-----------
drivers/gpu/nova-core/gsp/cmdq.rs | 20 +++++++++++++++-----
2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
index 174feaca0a6b9269cf35286dec3acc4d60918904..7dc67fd55ce6ce19cbb750961dcfb4e373a20b4c 100644
--- a/drivers/gpu/nova-core/gsp.rs
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -2,6 +2,8 @@

mod boot;

+use core::iter::Iterator;
+
use kernel::{
device,
dma::{
@@ -30,7 +32,7 @@
GspArgumentsPadded,
LibosMemoryRegionInitArgument, //
},
- num,
+ num, //
};

pub(crate) const GSP_PAGE_SHIFT: usize = 12;
@@ -47,16 +49,17 @@
unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {}

impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> {
- /// Creates a new page table array mapping `NUM_PAGES` GSP pages starting at address `start`.
- fn new(start: DmaAddress) -> Result<Self> {
- let mut ptes = [0u64; NUM_PAGES];
- for (i, pte) in ptes.iter_mut().enumerate() {
+ /// Initializes the page table array mapping `NUM_PAGES` GSP pages starting at address `start`.
+ /// This is done "in-memory" without using the stack to avoid overflow, by writing one page at
+ /// a time to the memory region
+ fn init(&mut self, start: DmaAddress) -> Result {
+ for (i, pte) in self.0.iter_mut().enumerate() {
*pte = start
.checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT)
.ok_or(EOVERFLOW)?;
}

- Ok(Self(ptes))
+ Ok(())
}
}

@@ -86,16 +89,25 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
NUM_PAGES * GSP_PAGE_SIZE,
GFP_KERNEL | __GFP_ZERO,
)?);
- let ptes = PteArray::<NUM_PAGES>::new(obj.0.dma_handle())?;
+
+ let start_addr = obj.0.dma_handle();

// SAFETY: `obj` has just been created and we are its sole user.
- unsafe {
- // Copy the self-mapping PTE at the expected location.
+ let pte_region = unsafe {
obj.0
- .as_slice_mut(size_of::<u64>(), size_of_val(&ptes))?
- .copy_from_slice(ptes.as_bytes())
+ .as_slice_mut(size_of::<u64>(), NUM_PAGES * size_of::<u64>())?
};

+ // As in [`PteArray::init`], this is a one by one GSP Page write to the memory
+ // to avoid stack overflow when allocating the whole array at once.
+ for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() {
+ let pte_value = start_addr
+ .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT)
+ .ok_or(EOVERFLOW)?;
+
+ chunk.copy_from_slice(&pte_value.to_ne_bytes());
+ }
+
Ok(obj)
}
}
diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
index 46819a82a51adc58423502d9d45730923b843656..132342a1a6d843e999e2d0e4fbcc76bde2bd8652 100644
--- a/drivers/gpu/nova-core/gsp/cmdq.rs
+++ b/drivers/gpu/nova-core/gsp/cmdq.rs
@@ -23,7 +23,7 @@
transmute::{
AsBytes,
FromBytes, //
- },
+ }, //
};

use crate::{
@@ -34,10 +34,10 @@
MsgFunction,
MsgqRxHeader,
MsgqTxHeader, //
- },
+ }, //
PteArray,
GSP_PAGE_SHIFT,
- GSP_PAGE_SIZE, //
+ GSP_PAGE_SIZE,
},
num,
regs,
@@ -159,6 +159,7 @@ struct Msgq {
#[repr(C)]
struct GspMem {
/// Self-mapping page table entries.
+ // ptes: [u64; GSP_PAGE_SIZE / size_of::<u64>()],
ptes: PteArray<{ GSP_PAGE_SIZE / size_of::<u64>() }>,
/// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the
/// write and read pointers that the CPU updates.
@@ -199,9 +200,18 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
const MSGQ_SIZE: u32 = num::usize_into_u32::<{ size_of::<Msgq>() }>();
const RX_HDR_OFF: u32 = num::usize_into_u32::<{ mem::offset_of!(Msgq, rx) }>();

- let gsp_mem =
+ let mut gsp_mem =
CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;
- dma_write!(gsp_mem[0].ptes = PteArray::new(gsp_mem.dma_handle())?)?;
+
+ let start_address = gsp_mem.dma_handle();
+
+ // SAFETY: `gsp_mem` has just been created and we are its sole user.
+ let mem: &mut [GspMem] = unsafe { gsp_mem.as_slice_mut(0, 1)? };
+
+ // Borrowing the array from gsp_mem and writing directly to that in the init method of
+ // PteArray
+ mem[0].ptes.init(start_address)?;
+
dma_write!(gsp_mem[0].cpuq.tx = MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES))?;
dma_write!(gsp_mem[0].cpuq.rx = MsgqRxHeader::new())?;

---
base-commit: cea7b66a80412e2a5b74627b89ae25f1d0110a4b
change-id: 20260212-drm-rust-next-beb92aee9d75

Best regards,
--
Tim Kovalenko <tim.kovalenko@xxxxxxxxx>