Re: [PATCH v4 4/4] gpu: nova-core: fix stack overflow in GSP memory allocation

From: Danilo Krummrich

Date: Mon Mar 09 2026 - 15:46:00 EST

On Mon Mar 9, 2026 at 5:34 PM CET, Tim Kovalenko via B4 Relay wrote:
> From: Tim Kovalenko <tim.kovalenko@xxxxxxxxx>
>
> The `Cmdq::new` function was allocating a `PteArray` struct on the stack
> and was causing a stack overflow with 8216 bytes.
>
> Modify the `PteArray` to calculate and write the Page Table Entries
> directly into the coherent DMA buffer one-by-one. This reduces the stack
> usage quite a lot.
>

Reported-by: Gary Guo <gary@xxxxxxxxxxx>
Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/509436-Nova/topic/.60Cmdq.3A.3Anew.60.20uses.20excessive.20stack.20size/near/570375549
Fixes: f38b4f105cfc ("gpu: nova-core: Create initial Gsp")

> Signed-off-by: Tim Kovalenko <tim.kovalenko@xxxxxxxxx>

A few nits below, but I can fix them up [1] on apply.

> + for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() {
> + let pte_value = start_addr
> + .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT)
> + .ok_or(EOVERFLOW)?;

This should use PteArray::entry().

It would also be nice to get rid of the unsafe {} and use dma_write!() instead,
but this can be a follow-up patch.

> +
> + chunk.copy_from_slice(&pte_value.to_ne_bytes());
> + }
> +
> Ok(obj)
> }
> }
> diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
> index 0056bfbf0a44cfbc5a0ca08d069f881b877e1edc..c8327d3098f73f9b880eee99038ad10a16e1e32d 100644
> --- a/drivers/gpu/nova-core/gsp/cmdq.rs
> +++ b/drivers/gpu/nova-core/gsp/cmdq.rs
> @@ -202,7 +202,20 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
>
> let gsp_mem =
> CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;
> - dma_write!(gsp_mem, [0]?.ptes, PteArray::new(gsp_mem.dma_handle())?);
> +
> + const NUM_PTES: usize = GSP_PAGE_SIZE / size_of::<u64>();

We can avoid duplicating this by making it a constant of GspMem.

> + let start = gsp_mem.dma_handle();
> + // One by one GSP Page write to the memory to avoid stack overflow when allocating
> + // the whole array at once.
> + for i in 0..NUM_PTES {
> + dma_write!(
> + gsp_mem,
> + [0]?.ptes.0[i],
> + PteArray::<NUM_PTES>::entry(start, i)?
> + );
> + }

-- [1] --

diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
index 20170e483e04..b5ea14b7dad7 100644
--- a/drivers/gpu/nova-core/gsp.rs
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -48,6 +48,7 @@ unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {}

impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> {
/// Returns the page table entry for `index`, for a mapping starting at `start` DmaAddress.
+ // TODO: Replace with `IoView` projection once available.
fn entry(start: DmaAddress, index: usize) -> Result<u64> {
start
.checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT)
@@ -90,12 +91,9 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
.as_slice_mut(size_of::<u64>(), NUM_PAGES * size_of::<u64>())?
};

- // This is a one by one GSP Page write to the memory
- // to avoid stack overflow when allocating the whole array at once.
+ // Write values one by one to avoid an on-stack instance of `PteArray`.
for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() {
- let pte_value = start_addr
- .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT)
- .ok_or(EOVERFLOW)?;
+ let pte_value = PteArray::<NUM_PAGES>::entry(start_addr, i)?;

chunk.copy_from_slice(&pte_value.to_ne_bytes());
}
diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
index 9107a1473797..aa42d180f0d5 100644
--- a/drivers/gpu/nova-core/gsp/cmdq.rs
+++ b/drivers/gpu/nova-core/gsp/cmdq.rs
@@ -159,7 +159,7 @@ struct Msgq {
#[repr(C)]
struct GspMem {
/// Self-mapping page table entries.
- ptes: PteArray<{ GSP_PAGE_SIZE / size_of::<u64>() }>,
+ ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>,
/// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the
/// write and read pointers that the CPU updates.
///
@@ -172,6 +172,10 @@ struct GspMem {
gspq: Msgq,
}

+impl GspMem {
+ const PTE_ARRAY_SIZE: usize = GSP_PAGE_SIZE / size_of::<u64>();
+}
+
// SAFETY: These structs don't meet the no-padding requirements of AsBytes but
// that is not a problem because they are not used outside the kernel.
unsafe impl AsBytes for GspMem {}
@@ -202,16 +206,14 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
let gsp_mem =
CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;

- const NUM_PTES: usize = GSP_PAGE_SIZE / size_of::<u64>();
-
let start = gsp_mem.dma_handle();
// One by one GSP Page write to the memory to avoid stack overflow when allocating
// the whole array at once.
- for i in 0..NUM_PTES {
+ for i in 0..GspMem::PTE_ARRAY_SIZE {
dma_write!(
gsp_mem,
[0]?.ptes.0[i],
- PteArray::<NUM_PTES>::entry(start, i)?
+ PteArray::<{ GspMem::PTE_ARRAY_SIZE }>::entry(start, i)?
);
}