[PATCH v4 13/20] drm/tyr: add firmware loading and MCU boot support

From: Deborah Brouwer

Date: Fri Apr 24 2026 - 19:45:33 EST


Add firmware loading and management for the Mali CSF GPU. This introduces
the fw module that loads the Mali GPU firmware binary, parses it into
sections, and maps those sections into the MCU VM at the required
virtual addresses.

On probe, the firmware is loaded, its sections are mapped and populated,
the MCU VM is activated, and the MCU is booted.

Co-developed-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx>
Signed-off-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx>
Signed-off-by: Deborah Brouwer <deborah.brouwer@xxxxxxxxxxxxx>
---
drivers/gpu/drm/tyr/Kconfig | 1 +
drivers/gpu/drm/tyr/driver.rs | 16 ++-
drivers/gpu/drm/tyr/fw.rs | 272 ++++++++++++++++++++++++++++++++++++++++++
drivers/gpu/drm/tyr/gem.rs | 3 -
drivers/gpu/drm/tyr/mmu.rs | 1 -
drivers/gpu/drm/tyr/slot.rs | 1 -
drivers/gpu/drm/tyr/tyr.rs | 1 +
drivers/gpu/drm/tyr/vm.rs | 1 -
8 files changed, 289 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/tyr/Kconfig b/drivers/gpu/drm/tyr/Kconfig
index 443ce988b570..729643f4db49 100644
--- a/drivers/gpu/drm/tyr/Kconfig
+++ b/drivers/gpu/drm/tyr/Kconfig
@@ -18,6 +18,7 @@ config DRM_TYR
select DRM_TYR_STATIC_DEPS
select IOMMU_IO_PGTABLE_LPAE
select RUST_DRM_GEM_SHMEM_HELPER
+ select RUST_FW_LOADER_ABSTRACTIONS
depends on IOMMU_SUPPORT
default n
help
diff --git a/drivers/gpu/drm/tyr/driver.rs b/drivers/gpu/drm/tyr/driver.rs
index 495021a8657d..246bc3cb8580 100644
--- a/drivers/gpu/drm/tyr/driver.rs
+++ b/drivers/gpu/drm/tyr/driver.rs
@@ -42,6 +42,7 @@

use crate::{
file::TyrDrmFileData,
+ fw::Firmware,
gem::BoData,
gpu,
gpu::GpuInfo,
@@ -63,6 +64,8 @@
pub(crate) struct TyrDrmDeviceData {
pub(crate) pdev: ARef<platform::Device>,

+ pub(crate) fw: Arc<Firmware>,
+
#[pin]
clks: Mutex<Clocks>,

@@ -154,10 +157,21 @@ fn probe(
let uninit_ddev = UnregisteredDevice::<TyrDrmDriver>::new(pdev.as_ref())?;
let platform: ARef<platform::Device> = pdev.into();

- let _mmu = Mmu::new(pdev, iomem.as_arc_borrow(), &gpu_info)?;
+ let mmu = Mmu::new(pdev, iomem.as_arc_borrow(), &gpu_info)?;
+
+ let firmware = Firmware::new(
+ pdev,
+ iomem.clone(),
+ &uninit_ddev,
+ mmu.as_arc_borrow(),
+ &gpu_info,
+ )?;
+
+ firmware.boot()?;

let data = try_pin_init!(TyrDrmDeviceData {
pdev: platform.clone(),
+ fw: firmware,
clks <- new_mutex!(Clocks {
core: core_clk,
stacks: stacks_clk,
diff --git a/drivers/gpu/drm/tyr/fw.rs b/drivers/gpu/drm/tyr/fw.rs
new file mode 100644
index 000000000000..cb2546350f0a
--- /dev/null
+++ b/drivers/gpu/drm/tyr/fw.rs
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+
+//! Firmware loading and management for Mali CSF GPU.
+//!
+//! This module handles loading the Mali GPU firmware binary, parsing it into sections,
+//! and mapping those sections into the MCU's virtual address space. Each firmware section
+//! has specific properties (read/write/execute permissions, cache modes) and must be loaded
+//! at specific virtual addresses expected by the MCU.
+//!
+//! See [`Firmware`] for the main firmware management interface and [`Section`] for
+//! individual firmware sections.
+//!
+//! [`Firmware`]: crate::fw::Firmware
+//! [`Section`]: crate::fw::Section
+
+use kernel::{
+ bits::genmask_u32,
+ devres::Devres,
+ drm::{
+ gem::BaseObject,
+ Uninit, //
+ },
+ impl_flags,
+ io::{
+ poll,
+ Io, //
+ },
+ platform,
+ prelude::*,
+ str::CString,
+ sync::{
+ Arc,
+ ArcBorrow, //
+ },
+ time,
+ types::ARef, //
+};
+
+use crate::{
+ driver::{
+ IoMem,
+ TyrDrmDevice, //
+ },
+ fw::parser::{
+ FwParser,
+ ParsedSection, //
+ },
+ gem,
+ gem::{
+ KernelBo,
+ KernelBoVaAlloc, //
+ },
+ gpu::GpuInfo,
+ mmu::Mmu,
+ regs::gpu_control::{
+ McuControlMode,
+ McuStatus,
+ GPU_ID,
+ MCU_CONTROL,
+ MCU_STATUS, //
+ },
+ vm::Vm, //
+};
+
+mod parser;
+
+impl_flags!(
+ #[derive(Debug, Clone, Default, Copy, PartialEq, Eq)]
+ pub(super) struct SectionFlags(u32);
+
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
+ pub(super) enum SectionFlag {
+ Read = 1 << 0,
+ Write = 1 << 1,
+ Exec = 1 << 2,
+ CacheModeNone = 0 << 3,
+ CacheModeCached = 1 << 3,
+ CacheModeUncachedCoherent = 2 << 3,
+ CacheModeCachedCoherent = 3 << 3,
+ Prot = 1 << 5,
+ Shared = 1 << 30,
+ Zero = 1 << 31,
+ }
+);
+
+pub(super) const CACHE_MODE_MASK: SectionFlags = SectionFlags(genmask_u32(3..=4));
+
+pub(super) const CSF_MCU_SHARED_REGION_START: u32 = 0x04000000;
+
+impl SectionFlags {
+ fn cache_mode(&self) -> SectionFlags {
+ *self & CACHE_MODE_MASK
+ }
+}
+
+impl TryFrom<u32> for SectionFlags {
+ type Error = Error;
+
+ fn try_from(value: u32) -> Result<Self, Self::Error> {
+ let valid_flags = SectionFlags::from(SectionFlag::Read)
+ | SectionFlags::from(SectionFlag::Write)
+ | SectionFlags::from(SectionFlag::Exec)
+ | CACHE_MODE_MASK
+ | SectionFlags::from(SectionFlag::Prot)
+ | SectionFlags::from(SectionFlag::Shared)
+ | SectionFlags::from(SectionFlag::Zero);
+
+ if value & valid_flags.0 != value {
+ Err(EINVAL)
+ } else {
+ Ok(Self(value))
+ }
+ }
+}
+
+/// A parsed section of the firmware binary.
+struct Section {
+ // Raw firmware section data for reset purposes
+ #[expect(dead_code)]
+ data: KVec<u8>,
+
+ // Keep the BO backing this firmware section so that both the
+ // GPU mapping and CPU mapping remain valid until the Section is dropped.
+ #[expect(dead_code)]
+ mem: gem::KernelBo,
+}
+
+/// Loaded firmware with sections mapped into MCU VM.
+pub(crate) struct Firmware {
+ /// Platform device reference (needed to access the MCU JOB_IRQ registers).
+ pdev: ARef<platform::Device>,
+
+ /// Iomem need to access registers.
+ iomem: Arc<Devres<IoMem>>,
+
+ /// MCU VM.
+ vm: Arc<Vm>,
+
+ /// List of firmware sections.
+ #[expect(dead_code)]
+ sections: KVec<Section>,
+}
+
+impl Drop for Firmware {
+ fn drop(&mut self) {
+ // AS slots retain a VM ref, we need to kill the circular ref manually.
+ self.vm.kill();
+ }
+}
+
+impl Firmware {
+ fn init_section_mem(mem: &mut KernelBo, data: &KVec<u8>) -> Result {
+ if data.is_empty() {
+ return Ok(());
+ }
+
+ let vmap = mem.bo.vmap::<0>()?;
+ let size = mem.bo.size();
+
+ if data.len() > size {
+ pr_err!("fw section {} bigger than BO {}\n", data.len(), size);
+ return Err(EINVAL);
+ }
+
+ for (i, &byte) in data.iter().enumerate() {
+ vmap.try_write8(byte, i)?;
+ }
+
+ Ok(())
+ }
+
+ fn request(
+ ddev: &TyrDrmDevice<Uninit>,
+ gpu_info: &GpuInfo,
+ ) -> Result<kernel::firmware::Firmware> {
+ let gpu_id = GPU_ID::from_raw(gpu_info.gpu_id);
+
+ let path = CString::try_from_fmt(fmt!(
+ "arm/mali/arch{}.{}/mali_csffw.bin",
+ gpu_id.arch_major().get(),
+ gpu_id.arch_minor().get()
+ ))?;
+
+ kernel::firmware::Firmware::request(&path, ddev.as_ref())
+ }
+
+ fn load(
+ ddev: &TyrDrmDevice<Uninit>,
+ gpu_info: &GpuInfo,
+ ) -> Result<(kernel::firmware::Firmware, KVec<ParsedSection>)> {
+ let fw = Self::request(ddev, gpu_info)?;
+ let mut parser = FwParser::new(fw.data());
+
+ let parsed_sections = parser.parse()?;
+
+ Ok((fw, parsed_sections))
+ }
+
+ /// Load firmware and map sections into MCU VM.
+ pub(crate) fn new(
+ pdev: &platform::Device,
+ iomem: Arc<Devres<IoMem>>,
+ ddev: &TyrDrmDevice<Uninit>,
+ mmu: ArcBorrow<'_, Mmu>,
+ gpu_info: &GpuInfo,
+ ) -> Result<Arc<Firmware>> {
+ let vm = Vm::new(pdev, ddev, mmu, gpu_info)?;
+
+ let (fw, parsed_sections) = Self::load(ddev, gpu_info)?;
+
+ vm.activate()?;
+
+ let mut sections = KVec::new();
+ for parsed in parsed_sections {
+ let size = (parsed.va.end - parsed.va.start) as usize;
+ let va = u64::from(parsed.va.start);
+
+ let mut mem = KernelBo::new(
+ ddev,
+ vm.as_arc_borrow(),
+ size.try_into().unwrap(),
+ KernelBoVaAlloc::Explicit(va),
+ parsed.vm_map_flags,
+ )?;
+
+ let section_start = parsed.data_range.start as usize;
+ let section_end = parsed.data_range.end as usize;
+ let mut data = KVec::new();
+
+ // Ensure that the firmware slice is not out of bounds.
+ let fw_data = fw.data();
+ let bytes = fw_data.get(section_start..section_end).ok_or(EINVAL)?;
+ data.extend_from_slice(bytes, GFP_KERNEL)?;
+
+ Self::init_section_mem(&mut mem, &data)?;
+
+ sections.push(Section { data, mem }, GFP_KERNEL)?;
+ }
+
+ let firmware = Arc::new(
+ Firmware {
+ pdev: pdev.into(),
+ iomem,
+ vm,
+ sections,
+ },
+ GFP_KERNEL,
+ )?;
+
+ Ok(firmware)
+ }
+
+ pub(crate) fn boot(&self) -> Result {
+ // SAFETY: Boot is currently only called in the probe path, so we're sure we have a bound
+ // device.
+ let dev = unsafe { self.pdev.as_ref().as_bound() };
+ let io = (self.iomem).access(dev)?;
+ io.write_reg(MCU_CONTROL::zeroed().with_req(McuControlMode::Auto));
+
+ if let Err(e) = poll::read_poll_timeout(
+ || Ok(io.read(MCU_STATUS)),
+ |status| status.value() == McuStatus::Enabled,
+ time::Delta::from_millis(1),
+ time::Delta::from_millis(100),
+ ) {
+ let status = io.read(MCU_STATUS);
+ pr_err!("MCU failed to boot, status: {:?}", status.value());
+ return Err(e);
+ }
+ Ok(())
+ }
+}
diff --git a/drivers/gpu/drm/tyr/gem.rs b/drivers/gpu/drm/tyr/gem.rs
index d032a8ae543f..4ec373e0bcfa 100644
--- a/drivers/gpu/drm/tyr/gem.rs
+++ b/drivers/gpu/drm/tyr/gem.rs
@@ -94,7 +94,6 @@ pub(crate) fn new_dummy_object<Ctx: DeviceContext>(ddev: &TyrDrmDevice<Ctx>) ->
/// a [`KernelBo`]. An automatic VA allocation strategy will be added in the future.
pub(crate) enum KernelBoVaAlloc {
/// Explicit VA address specified by the caller.
- #[expect(dead_code)]
Explicit(u64),
}

@@ -107,7 +106,6 @@ pub(crate) enum KernelBoVaAlloc {
/// When dropped, the buffer is automatically unmapped from the GPU VA space.
pub(crate) struct KernelBo {
/// The underlying GEM buffer object.
- #[expect(dead_code)]
pub(crate) bo: ARef<Bo>,
/// The GPU VM this buffer is mapped into.
vm: Arc<Vm>,
@@ -121,7 +119,6 @@ impl KernelBo {
/// This function allocates a new shmem-backed GEM object and immediately maps
/// it into the specified GPU virtual memory space. The mapping is automatically
/// cleaned up when the [`KernelBo`] is dropped.
- #[expect(dead_code)]
pub(crate) fn new<Ctx: DeviceContext>(
ddev: &TyrDrmDevice<Ctx>,
vm: ArcBorrow<'_, Vm>,
diff --git a/drivers/gpu/drm/tyr/mmu.rs b/drivers/gpu/drm/tyr/mmu.rs
index 09df98ffc9e3..935e2102ab30 100644
--- a/drivers/gpu/drm/tyr/mmu.rs
+++ b/drivers/gpu/drm/tyr/mmu.rs
@@ -12,7 +12,6 @@
//!
//! [`AddressSpaceManager`]: address_space::AddressSpaceManager
//! [`SlotManager`]: crate::slot::SlotManager
-#![allow(dead_code)]

use core::ops::Range;

diff --git a/drivers/gpu/drm/tyr/slot.rs b/drivers/gpu/drm/tyr/slot.rs
index debba75f6204..53abb9eeb970 100644
--- a/drivers/gpu/drm/tyr/slot.rs
+++ b/drivers/gpu/drm/tyr/slot.rs
@@ -20,7 +20,6 @@
//!
//! [SlotOperations]: crate::slot::SlotOperations
//! [SlotManager]: crate::slot::SlotManager
-#![allow(dead_code)]

use core::{
mem::take,
diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs
index b3244670dd79..18b0668bb217 100644
--- a/drivers/gpu/drm/tyr/tyr.rs
+++ b/drivers/gpu/drm/tyr/tyr.rs
@@ -9,6 +9,7 @@

mod driver;
mod file;
+mod fw;
mod gem;
mod gpu;
mod mmu;
diff --git a/drivers/gpu/drm/tyr/vm.rs b/drivers/gpu/drm/tyr/vm.rs
index c19300d76194..1ef7e40ccdb5 100644
--- a/drivers/gpu/drm/tyr/vm.rs
+++ b/drivers/gpu/drm/tyr/vm.rs
@@ -6,7 +6,6 @@
//! the illusion of owning the entire virtual address (VA) range, similar to CPU virtual memory.
//! Each virtual memory (VM) area is backed by ARM64 LPAE Stage 1 page tables and can be
//! mapped into hardware address space (AS) slots for GPU execution.
-#![allow(dead_code)]

use core::ops::Range;


--
2.53.0