Re: [PATCH v5 net-next 05/11] net/nebula-matrix: add channel layer

From: Andrew Lunn

Date: Fri Feb 27 2026 - 18:32:18 EST


> + /* wmb */
> + wmb();

What does the comment tell me which the function name does not?

What would be more interesting in the comment is what is actually
critical to be flushed.

> +static int nbl_chan_kick_tx_ring(struct nbl_channel_mgt *chan_mgt,
> + struct nbl_chan_info *chan_info)
> +{
> + struct nbl_hw_ops *hw_ops = chan_mgt->hw_ops_tbl->ops;
> + struct nbl_chan_ring *txq = &chan_info->txq;
> + struct device *dev = chan_mgt->common->dev;
> + struct nbl_chan_tx_desc *tx_desc;
> + int i = 0;
> +
> + /* mb for tx notify */
> + mb();

This comment is also not so useful. Why is a memory barrier needed
here?

> + NBL_UPDATE_QUEUE_TAIL_PTR(chan_info, hw_ops, chan_mgt, txq->tail_ptr,
> + NBL_MB_TX_QID);
> +
> + tx_desc = NBL_CHAN_TX_RING_TO_DESC(txq, txq->next_to_clean);
> +
> + while (!(tx_desc->flags & NBL_CHAN_TX_DESC_USED)) {
> + udelay(NBL_CHAN_TX_WAIT_US);
> + i++;
> +
> + if (!(i % NBL_CHAN_TX_REKICK_WAIT_TIMES))
> + NBL_UPDATE_QUEUE_TAIL_PTR(chan_info, hw_ops, chan_mgt,
> + txq->tail_ptr, NBL_MB_TX_QID);
> +
> + if (i == NBL_CHAN_TX_WAIT_TIMES) {
> + dev_err(dev, "chan send message type: %d timeout\n",
> + tx_desc->msg_type);
> + return -EAGAIN;

Would -ETIMEDOUT be better here? What does it actually mean? That the
firmware is dead?

> +static int nbl_chan_get_msg_id(struct nbl_chan_info *chan_info,
> + union nbl_chan_msg_id *msgid)
> +{
> + struct nbl_chan_waitqueue_head *wait = NULL;
> + int valid_loc = chan_info->wait_head_index, i;
> +
> + for (i = 0; i < NBL_CHAN_QUEUE_LEN; i++) {
> + wait = &chan_info->wait[valid_loc];
> +
> + if (wait->status != NBL_MBX_STATUS_WAITING) {
> + wait->msg_index = NBL_NEXT_ID(wait->msg_index,
> + NBL_CHAN_MSG_INDEX_MAX);
> + msgid->info.index = wait->msg_index;
> + msgid->info.loc = valid_loc;
> +
> + valid_loc = NBL_NEXT_ID(valid_loc,
> + chan_info->num_txq_entries - 1);
> + chan_info->wait_head_index = valid_loc;
> + return 0;
> + }
> +
> + valid_loc =
> + NBL_NEXT_ID(valid_loc, chan_info->num_txq_entries - 1);
> + }
> +
> + return -ENOSPC;

man 3 errno:

ENOSPC No space left on device (POSIX.1-2001).

Seems like something a file system would return. Why would this error
happen?

> +}
> +
> +static int nbl_chan_send_msg(struct nbl_channel_mgt *chan_mgt,
> + struct nbl_chan_send_info *chan_send)
> +{
> + struct nbl_chan_info *chan_info = NBL_CHAN_MGT_TO_MBX(chan_mgt);
> + struct nbl_common_info *common = chan_mgt->common;
> + struct nbl_chan_waitqueue_head *wait_head;
> + union nbl_chan_msg_id msgid = { { 0 } };
> + struct nbl_chan_tx_param tx_param = { 0 };
> + int i = NBL_CHAN_TX_WAIT_ACK_TIMES, ret;
> + struct device *dev = common->dev;
> +
> + if (test_bit(NBL_CHAN_ABNORMAL, chan_info->state))
> + return -EFAULT;

EFAULT Bad address (POSIX.1-2001).

This is about memory address, unexpected page faults, access outside
of the processes memory space.

> +#define NBL_CHAN_TX_RING_TO_DESC(tx_ring, i) \
> + (&(((struct nbl_chan_tx_desc *)((tx_ring)->desc))[i]))
> +#define NBL_CHAN_RX_RING_TO_DESC(rx_ring, i) \
> + (&(((struct nbl_chan_rx_desc *)((rx_ring)->desc))[i]))

Are the casts needed here?

> +struct nbl_chan_tx_desc {
> + u16 flags;
> + u16 srcid;
> + u16 dstid;
> + u16 data_len;

> + u16 buf_len;

> + u64 buf_addr;
> + u16 msg_type;
> + u8 data[16];
> + u16 msgid;
> + u8 rsv[26];
> +} __packed;

This is a hardware descriptor? It is actually in memory like this,
with the u64 buf_addr badly aligned? What was the hardware engineer
thinking? Just swapping buf_addr and buf_len would of made it a lot
better.

> +struct nbl_chan_rx_desc {
> + u16 flags;
> + u32 buf_len;
> + u16 buf_id;
> + u64 buf_addr;
> +} __packed;

Similarly badly designed.

> +++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c
> @@ -5,8 +5,168 @@
> */
>
> #include "nbl_hw_leonis.h"
> +static inline void nbl_hw_read_mbx_regs(struct nbl_hw_mgt *hw_mgt, u64 reg,
> + u8 *data, u32 len)
> +{

No inline functions in .c files. Let the compiler decide.

> +static void nbl_hw_rd_regs(struct nbl_hw_mgt *hw_mgt, u64 reg, u8 *data,
> + u32 len)
> +{
> + u32 size = len / 4;
> + u32 i = 0;
> +
> + if (len % 4)
> + return;
> +
> + spin_lock(&hw_mgt->reg_lock);
> +
> + for (i = 0; i < size; i++)
> + *(u32 *)(data + i * sizeof(u32)) =
> + rd32(hw_mgt->hw_addr, reg + i * sizeof(u32));
> + spin_unlock(&hw_mgt->reg_lock);
> +}

> +static u32 nbl_hw_get_host_pf_mask(struct nbl_hw_mgt *hw_mgt)
> +{
> + u32 data;
> +
> + nbl_hw_rd_regs(hw_mgt, NBL_PCIE_HOST_K_PF_MASK_REG, (u8 *)&data,
> + sizeof(data));
> + return data;

More code with dubious casts. data is a u32, which you cast to u8 for
the call, and nbl_hw_rd_regs() casts it back to u32.