[PATCH v5 32/38] gpu: nova-core: Hopper/Blackwell: larger WPR2 (GSP) heap
From: John Hubbard
Date: Fri Feb 20 2026 - 21:16:13 EST
Hopper, Blackwell and later GPUs require a larger heap for WPR2.
Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx>
---
drivers/gpu/nova-core/fb.rs | 2 +-
drivers/gpu/nova-core/gsp/fw.rs | 74 ++++++++++++++++++++++++---------
2 files changed, 55 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs
index 8b3ba9c9f464..08e6dd815352 100644
--- a/drivers/gpu/nova-core/fb.rs
+++ b/drivers/gpu/nova-core/fb.rs
@@ -247,7 +247,7 @@ pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw: &GspFirmware) -> Result<
let wpr2_heap = {
const WPR2_HEAP_DOWN_ALIGN: Alignment = Alignment::new::<SZ_1M>();
let wpr2_heap_size =
- gsp::LibosParams::from_chipset(chipset).wpr_heap_size(chipset, fb.end);
+ gsp::LibosParams::from_chipset(chipset).wpr_heap_size(chipset, fb.end)?;
let wpr2_heap_addr = (elf.start - wpr2_heap_size).align_down(WPR2_HEAP_DOWN_ALIGN);
FbRange(wpr2_heap_addr..(elf.start).align_down(WPR2_HEAP_DOWN_ALIGN))
diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
index 086153edfa86..7fa9d3b1a592 100644
--- a/drivers/gpu/nova-core/gsp/fw.rs
+++ b/drivers/gpu/nova-core/gsp/fw.rs
@@ -49,32 +49,52 @@ enum GspFwHeapParams {}
/// Minimum required alignment for the GSP heap.
const GSP_HEAP_ALIGNMENT: Alignment = Alignment::new::<{ 1 << 20 }>();
+// These constants override the generated bindings for architecture-specific heap sizing.
+// See Open RM: kgspCalculateGspFwHeapSize and related functions.
+//
+// 14MB for Hopper/Blackwell+.
+const GSP_FW_HEAP_PARAM_BASE_RM_SIZE_GH100: u64 = 14 * num::usize_as_u64(SZ_1M);
+// 142MB client alloc for ~188MB total.
+const GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE_GH100: u64 = 142 * num::usize_as_u64(SZ_1M);
+// Hopper/Blackwell+ minimum heap size: 170MB (88 + 12 + 70).
+// See Open RM: GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB for the base 88MB,
+// plus Hopper+ additions in kgspCalculateGspFwHeapSize_GH100.
+const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB_HOPPER: u64 = 170;
+
impl GspFwHeapParams {
/// Returns the amount of GSP-RM heap memory used during GSP-RM boot and initialization (up to
/// and including the first client subdevice allocation).
- fn base_rm_size(_chipset: Chipset) -> u64 {
- // TODO: this needs to be updated to return the correct value for Hopper+ once support for
- // them is added:
- // u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_GH100)
- u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X)
+ fn base_rm_size(chipset: Chipset) -> u64 {
+ use crate::gpu::Architecture;
+ match chipset.arch() {
+ Architecture::Hopper | Architecture::Blackwell => {
+ GSP_FW_HEAP_PARAM_BASE_RM_SIZE_GH100
+ }
+ _ => u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X),
+ }
}
/// Returns the amount of heap memory required to support a single channel allocation.
- fn client_alloc_size() -> u64 {
- u64::from(bindings::GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE)
- .align_up(GSP_HEAP_ALIGNMENT)
- .unwrap_or(u64::MAX)
+ fn client_alloc_size(chipset: Chipset) -> Result<u64> {
+ use crate::gpu::Architecture;
+ let size = match chipset.arch() {
+ Architecture::Hopper | Architecture::Blackwell => {
+ GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE_GH100
+ }
+ _ => u64::from(bindings::GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE),
+ };
+ size.align_up(GSP_HEAP_ALIGNMENT).ok_or(EINVAL)
}
/// Returns the amount of memory to reserve for management purposes for a framebuffer of size
/// `fb_size`.
- fn management_overhead(fb_size: u64) -> u64 {
+ fn management_overhead(fb_size: u64) -> Result<u64> {
let fb_size_gb = fb_size.div_ceil(u64::from_safe_cast(kernel::sizes::SZ_1G));
u64::from(bindings::GSP_FW_HEAP_PARAM_SIZE_PER_GB_FB)
.saturating_mul(fb_size_gb)
.align_up(GSP_HEAP_ALIGNMENT)
- .unwrap_or(u64::MAX)
+ .ok_or(EINVAL)
}
}
@@ -106,29 +126,43 @@ impl LibosParams {
* num::usize_as_u64(SZ_1M),
};
+ /// Hopper/Blackwell+ GPUs need a larger minimum heap size than the bindings specify.
+ /// The r570 bindings set LIBOS3_BAREMETAL_MIN_MB to 88MB, but Hopper/Blackwell+ actually
+ /// requires 170MB (88 + 12 + 70).
+ const LIBOS_HOPPER: LibosParams = LibosParams {
+ carveout_size: num::u32_as_u64(bindings::GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS3_BAREMETAL),
+ allowed_heap_size: GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB_HOPPER
+ * num::usize_as_u64(SZ_1M)
+ ..num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MAX_MB)
+ * num::usize_as_u64(SZ_1M),
+ };
+
/// Returns the libos parameters corresponding to `chipset`.
pub(crate) fn from_chipset(chipset: Chipset) -> &'static LibosParams {
- if chipset < Chipset::GA102 {
- &Self::LIBOS2
- } else {
- &Self::LIBOS3
+ use crate::gpu::Architecture;
+ match chipset.arch() {
+ Architecture::Turing => &Self::LIBOS2,
+ Architecture::Ampere if chipset == Chipset::GA100 => &Self::LIBOS2,
+ Architecture::Ampere | Architecture::Ada => &Self::LIBOS3,
+ Architecture::Hopper | Architecture::Blackwell => &Self::LIBOS_HOPPER,
}
}
/// Returns the amount of memory (in bytes) to allocate for the WPR heap for a framebuffer size
/// of `fb_size` (in bytes) for `chipset`.
- pub(crate) fn wpr_heap_size(&self, chipset: Chipset, fb_size: u64) -> u64 {
+ pub(crate) fn wpr_heap_size(&self, chipset: Chipset, fb_size: u64) -> Result<u64> {
// The WPR heap will contain the following:
// LIBOS carveout,
- self.carveout_size
+ Ok(self
+ .carveout_size
// RM boot working memory,
.saturating_add(GspFwHeapParams::base_rm_size(chipset))
// One RM client,
- .saturating_add(GspFwHeapParams::client_alloc_size())
+ .saturating_add(GspFwHeapParams::client_alloc_size(chipset)?)
// Overhead for memory management.
- .saturating_add(GspFwHeapParams::management_overhead(fb_size))
+ .saturating_add(GspFwHeapParams::management_overhead(fb_size)?)
// Clamp to the supported heap sizes.
- .clamp(self.allowed_heap_size.start, self.allowed_heap_size.end - 1)
+ .clamp(self.allowed_heap_size.start, self.allowed_heap_size.end - 1))
}
}
--
2.53.0