[PATCH v4 4/4] gpu: nova-core: fix stack overflow in GSP memory allocation

From: Tim Kovalenko via B4 Relay

Date: Mon Mar 09 2026 - 12:37:11 EST


From: Tim Kovalenko <tim.kovalenko@xxxxxxxxx>

The `Cmdq::new` function was allocating a `PteArray` struct on the stack
and was causing a stack overflow with 8216 bytes.

Modify the `PteArray` to calculate and write the Page Table Entries
directly into the coherent DMA buffer one-by-one. This reduces the stack
usage quite a lot.

Signed-off-by: Tim Kovalenko <tim.kovalenko@xxxxxxxxx>
---
drivers/gpu/nova-core/gsp.rs | 34 +++++++++++++++++++---------------
drivers/gpu/nova-core/gsp/cmdq.rs | 15 ++++++++++++++-
2 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
index 25cd48514c777cb405a2af0acf57196b2e2e7837..20170e483e04c476efce8997b3916b0ad829ed38 100644
--- a/drivers/gpu/nova-core/gsp.rs
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -47,16 +47,11 @@
unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {}

impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> {
- /// Creates a new page table array mapping `NUM_PAGES` GSP pages starting at address `start`.
- fn new(start: DmaAddress) -> Result<Self> {
- let mut ptes = [0u64; NUM_PAGES];
- for (i, pte) in ptes.iter_mut().enumerate() {
- *pte = start
- .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT)
- .ok_or(EOVERFLOW)?;
- }
-
- Ok(Self(ptes))
+ /// Returns the page table entry for `index`, for a mapping starting at `start` DmaAddress.
+ fn entry(start: DmaAddress, index: usize) -> Result<u64> {
+ start
+ .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT)
+ .ok_or(EOVERFLOW)
}
}

@@ -86,16 +81,25 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
NUM_PAGES * GSP_PAGE_SIZE,
GFP_KERNEL | __GFP_ZERO,
)?);
- let ptes = PteArray::<NUM_PAGES>::new(obj.0.dma_handle())?;
+
+ let start_addr = obj.0.dma_handle();

// SAFETY: `obj` has just been created and we are its sole user.
- unsafe {
- // Copy the self-mapping PTE at the expected location.
+ let pte_region = unsafe {
obj.0
- .as_slice_mut(size_of::<u64>(), size_of_val(&ptes))?
- .copy_from_slice(ptes.as_bytes())
+ .as_slice_mut(size_of::<u64>(), NUM_PAGES * size_of::<u64>())?
};

+ // This is a one by one GSP Page write to the memory
+ // to avoid stack overflow when allocating the whole array at once.
+ for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() {
+ let pte_value = start_addr
+ .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT)
+ .ok_or(EOVERFLOW)?;
+
+ chunk.copy_from_slice(&pte_value.to_ne_bytes());
+ }
+
Ok(obj)
}
}
diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
index 0056bfbf0a44cfbc5a0ca08d069f881b877e1edc..c8327d3098f73f9b880eee99038ad10a16e1e32d 100644
--- a/drivers/gpu/nova-core/gsp/cmdq.rs
+++ b/drivers/gpu/nova-core/gsp/cmdq.rs
@@ -202,7 +202,20 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {

let gsp_mem =
CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;
- dma_write!(gsp_mem, [0]?.ptes, PteArray::new(gsp_mem.dma_handle())?);
+
+ const NUM_PTES: usize = GSP_PAGE_SIZE / size_of::<u64>();
+
+ let start = gsp_mem.dma_handle();
+ // One by one GSP Page write to the memory to avoid stack overflow when allocating
+ // the whole array at once.
+ for i in 0..NUM_PTES {
+ dma_write!(
+ gsp_mem,
+ [0]?.ptes.0[i],
+ PteArray::<NUM_PTES>::entry(start, i)?
+ );
+ }
+
dma_write!(
gsp_mem,
[0]?.cpuq.tx,

--
2.53.0