[PATCH v4 3/8] gpu: nova-core: gsp: add GspRpcError for Cmdq RPC error handling

From: Eliot Courtney

Date: Fri Apr 17 2026 - 11:40:47 EST


Currently, the RPC status value is ignored, but it can actually indicate
failure from GSP-RM. Add a new error type used by `Cmdq` to surface this
failure mode.

Signed-off-by: Eliot Courtney <ecourtney@xxxxxxxxxx>
---
drivers/gpu/nova-core/gsp/cmdq.rs | 112 ++++++++++++++++++++++-----------
drivers/gpu/nova-core/gsp/commands.rs | 7 ++-
drivers/gpu/nova-core/gsp/fw.rs | 37 ++++++++++-
drivers/gpu/nova-core/gsp/sequencer.rs | 5 +-
4 files changed, 117 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
index 569bb1a2501c..501d01e2bedd 100644
--- a/drivers/gpu/nova-core/gsp/cmdq.rs
+++ b/drivers/gpu/nova-core/gsp/cmdq.rs
@@ -41,6 +41,7 @@
gsp::{
fw::{
GspMsgElement,
+ GspMsgRmError,
MsgFunction,
MsgqRxHeader,
MsgqTxHeader,
@@ -55,6 +56,24 @@
sbuffer::SBufferIter, //
};

+/// Error returned by GSP RPC operations.
+#[derive(Debug)]
+pub(crate) enum GspRpcError {
+ /// The command transport or reply decoding failed.
+ Transport(Error),
+ /// The GSP-RM RPC returned an RM-specific error status.
+ Rm(GspMsgRmError),
+}
+
+impl From<GspRpcError> for Error {
+ fn from(err: GspRpcError) -> Self {
+ match err {
+ GspRpcError::Transport(err) => err,
+ GspRpcError::Rm(status) => status.into(),
+ }
+ }
+}
+
/// Marker type representing the absence of a reply for a command. Commands using this as their
/// reply type are sent using [`Cmdq::send_command_no_wait`].
pub(crate) struct NoReply;
@@ -547,13 +566,14 @@ fn notify_gsp(bar: &Bar0) {
///
/// # Errors
///
- /// - `ETIMEDOUT` if space does not become available to send the command, or if the reply is
- /// not received within the timeout.
- /// - `EIO` if the variable payload requested by the command has not been entirely
+ /// - `Transport(ETIMEDOUT)` if space does not become available to send the command, or if the
+ /// reply is not received within the timeout.
+ /// - `Transport(EIO)` if the variable payload requested by the command has not been entirely
/// written to by its [`CommandToGsp::init_variable_payload`] method.
+ /// - `Rm(status)` if GSP-RM returned an error for this RPC.
///
- /// Error codes returned by the command and reply initializers are propagated as-is.
- pub(crate) fn send_command<M>(&self, bar: &Bar0, command: M) -> Result<M::Reply>
+ /// Error codes returned by the command and reply initializers are propagated as `Transport`.
+ pub(crate) fn send_command<M>(&self, bar: &Bar0, command: M) -> Result<M::Reply, GspRpcError>
where
M: CommandToGsp,
M::Reply: MessageFromGsp,
@@ -561,12 +581,14 @@ pub(crate) fn send_command<M>(&self, bar: &Bar0, command: M) -> Result<M::Reply>
Error: From<<M::Reply as MessageFromGsp>::InitError>,
{
let mut inner = self.inner.lock();
- inner.send_command(bar, command)?;
+ inner
+ .send_command(bar, command)
+ .map_err(GspRpcError::Transport)?;

loop {
match inner.receive_msg::<M::Reply>(Self::RECEIVE_TIMEOUT) {
Ok(reply) => break Ok(reply),
- Err(ERANGE) => continue,
+ Err(GspRpcError::Transport(ERANGE)) => continue,
Err(e) => break Err(e),
}
}
@@ -592,7 +614,7 @@ pub(crate) fn send_command_no_wait<M>(&self, bar: &Bar0, command: M) -> Result
/// Receive a message from the GSP.
///
/// See [`CmdqInner::receive_msg`] for details.
- pub(crate) fn receive_msg<M: MessageFromGsp>(&self, timeout: Delta) -> Result<M>
+ pub(crate) fn receive_msg<M: MessageFromGsp>(&self, timeout: Delta) -> Result<M, GspRpcError>
where
// This allows all error types, including `Infallible`, to be used for `M::InitError`.
Error: From<M::InitError>,
@@ -801,52 +823,68 @@ fn wait_for_msg(&self, timeout: Delta) -> Result<GspMessage<'_>> {
/// Receive a message from the GSP.
///
/// The expected message type is specified using the `M` generic parameter. If the pending
- /// message has a different function code, `ERANGE` is returned and the message is consumed.
+ /// message has a different function code, `Transport(ERANGE)` is returned and the message is
+ /// consumed.
///
/// The read pointer is always advanced past the message, regardless of whether it matched.
///
/// # Errors
///
- /// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available.
- /// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the
- /// message queue.
- /// - `EINVAL` if the function code of the message was not recognized.
- /// - `ERANGE` if the message had a recognized but non-matching function code.
+ /// - `Transport(ETIMEDOUT)` if `timeout` has elapsed before any message becomes available.
+ /// - `Transport(EIO)` if there was some inconsistency (e.g. message shorter than advertised)
+ /// on the message queue.
+ /// - `Transport(EINVAL)` if the function code of the message was not recognized.
+ /// - `Transport(ERANGE)` if the message had a recognized but non-matching function code.
+ /// - `Rm(status)` if GSP-RM returned an error for this RPC.
///
- /// Error codes returned by [`MessageFromGsp::read`] are propagated as-is.
- fn receive_msg<M: MessageFromGsp>(&mut self, timeout: Delta) -> Result<M>
+ /// Error codes returned by [`MessageFromGsp::read`] are propagated as `Transport`.
+ fn receive_msg<M: MessageFromGsp>(&mut self, timeout: Delta) -> Result<M, GspRpcError>
where
// This allows all error types, including `Infallible`, to be used for `M::InitError`.
Error: From<M::InitError>,
{
- let message = self.wait_for_msg(timeout)?;
- let function = message.header.function().map_err(|_| EINVAL)?;
+ let message = self.wait_for_msg(timeout).map_err(GspRpcError::Transport)?;
+ let function = message
+ .header
+ .function()
+ .map_err(|_| GspRpcError::Transport(EINVAL))?;

// Extract the message. Store the result as we want to advance the read pointer even in
// case of failure.
- let result = if function == M::FUNCTION {
- let (cmd, contents_1) = M::Message::from_bytes_prefix(message.contents.0).ok_or(EIO)?;
+ let result = (|| -> Result<M, GspRpcError> {
+ message
+ .header
+ .status()
+ .map_err(GspRpcError::Transport)?
+ .log_if_warning(&self.dev, function)
+ .map_err(GspRpcError::Rm)?;
+
+ if function != M::FUNCTION {
+ return Err(GspRpcError::Transport(ERANGE));
+ }
+
+ let (cmd, contents_1) = M::Message::from_bytes_prefix(message.contents.0)
+ .ok_or(GspRpcError::Transport(EIO))?;
let mut sbuffer = SBufferIter::new_reader([contents_1, message.contents.1]);

- M::read(cmd, &mut sbuffer)
- .map_err(|e| e.into())
- .inspect(|_| {
- if !sbuffer.is_empty() {
- dev_warn!(
- &self.dev,
- "GSP message {:?} has unprocessed data\n",
- function
- );
- }
- })
- } else {
- Err(ERANGE)
- };
+ let msg = M::read(cmd, &mut sbuffer).map_err(|e| GspRpcError::Transport(e.into()))?;
+
+ if !sbuffer.is_empty() {
+ dev_warn!(
+ &self.dev,
+ "GSP message {:?} has unprocessed data\n",
+ function
+ );
+ }
+
+ Ok(msg)
+ })();

// Advance the read pointer past this message.
- self.gsp_mem.advance_cpu_read_ptr(u32::try_from(
- message.header.length().div_ceil(GSP_PAGE_SIZE),
- )?);
+ self.gsp_mem.advance_cpu_read_ptr(
+ u32::try_from(message.header.length().div_ceil(GSP_PAGE_SIZE))
+ .map_err(|_| GspRpcError::Transport(EINVAL))?,
+ );

result
}
diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs
index c89c7b57a751..b8f64bfe9313 100644
--- a/drivers/gpu/nova-core/gsp/commands.rs
+++ b/drivers/gpu/nova-core/gsp/commands.rs
@@ -23,6 +23,7 @@
cmdq::{
Cmdq,
CommandToGsp,
+ GspRpcError,
MessageFromGsp,
NoReply, //
},
@@ -169,8 +170,8 @@ pub(crate) fn wait_gsp_init_done(cmdq: &Cmdq) -> Result {
loop {
match cmdq.receive_msg::<GspInitDone>(Cmdq::RECEIVE_TIMEOUT) {
Ok(_) => break Ok(()),
- Err(ERANGE) => continue,
- Err(e) => break Err(e),
+ Err(GspRpcError::Transport(ERANGE)) => continue,
+ Err(e) => break Err(e.into()),
}
}
}
@@ -234,6 +235,6 @@ pub(crate) fn gpu_name(&self) -> core::result::Result<&str, GpuNameError> {
}

/// Send the [`GetGspInfo`] command and awaits for its reply.
-pub(crate) fn get_gsp_info(cmdq: &Cmdq, bar: &Bar0) -> Result<GetGspStaticInfoReply> {
+pub(crate) fn get_gsp_info(cmdq: &Cmdq, bar: &Bar0) -> Result<GetGspStaticInfoReply, GspRpcError> {
cmdq.send_command(bar, GetGspStaticInfo)
}
diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
index a8d7c62af097..5fabb815a919 100644
--- a/drivers/gpu/nova-core/gsp/fw.rs
+++ b/drivers/gpu/nova-core/gsp/fw.rs
@@ -6,9 +6,13 @@
// Alias to avoid repeating the version number with every use.
use r570_144 as bindings;

-use core::ops::Range;
+use core::{
+ fmt,
+ ops::Range, //
+};

use kernel::{
+ device,
dma::Coherent,
prelude::*,
ptr::{
@@ -99,7 +103,6 @@ pub(in crate::gsp) fn advance_cpu_write_ptr(qs: &Coherent<GspMem>, count: u32) {

/// Status code returned by GSP-RM operations.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-#[expect(dead_code)]
pub(crate) enum GspMsgRmStatus {
/// The operation succeeded.
Ok,
@@ -605,6 +608,31 @@ fn try_from(value: u32) -> Result<Self> {
}
}

+impl GspMsgRmStatus {
+ /// Converts [`GspMsgRmStatus`] to a [`Result`], logging if the status is a warning.
+ ///
+ /// `rpc_name` identifies the RPC for the log message.
+ pub(super) fn log_if_warning(
+ self,
+ dev: &device::Device,
+ rpc_name: impl fmt::Debug,
+ ) -> Result<(), GspMsgRmError> {
+ match self {
+ Self::Ok => Ok(()),
+ Self::Warning(warning) => {
+ dev_warn!(
+ dev,
+ "GSP RPC {:?} returned warning {:?}\n",
+ rpc_name,
+ warning
+ );
+ Ok(())
+ }
+ Self::Error(status) => Err(status),
+ }
+ }
+}
+
/// Empty type to group methods related to heap parameters for running the GSP firmware.
enum GspFwHeapParams {}

@@ -1347,6 +1375,11 @@ pub(crate) fn function(&self) -> Result<MsgFunction, u32> {
.map_err(|_| self.inner.rpc.function)
}

+ /// Returns the RPC status from the message header.
+ pub(super) fn status(&self) -> Result<GspMsgRmStatus> {
+ self.inner.rpc.rpc_result.try_into()
+ }
+
// Returns the number of elements (i.e. memory pages) used by this message.
pub(crate) fn element_count(&self) -> u32 {
self.inner.elemCount
diff --git a/drivers/gpu/nova-core/gsp/sequencer.rs b/drivers/gpu/nova-core/gsp/sequencer.rs
index 474e4c8021db..672c7d6c3cf6 100644
--- a/drivers/gpu/nova-core/gsp/sequencer.rs
+++ b/drivers/gpu/nova-core/gsp/sequencer.rs
@@ -29,6 +29,7 @@
gsp::{
cmdq::{
Cmdq,
+ GspRpcError,
MessageFromGsp, //
},
fw,
@@ -360,8 +361,8 @@ pub(crate) fn run(cmdq: &Cmdq, params: GspSequencerParams<'a>) -> Result {
let seq_info = loop {
match cmdq.receive_msg::<GspSequence>(Cmdq::RECEIVE_TIMEOUT) {
Ok(seq_info) => break seq_info,
- Err(ERANGE) => continue,
- Err(e) => return Err(e),
+ Err(GspRpcError::Transport(ERANGE)) => continue,
+ Err(e) => return Err(e.into()),
}
};


--
2.53.0