Re: [PATCH v13 net-next 08/11] net/nebula-matrix: add vsi resource implementation

From: Jakub Kicinski

Date: Thu Apr 30 2026 - 19:48:06 EST


This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.
---
net/nebula-matrix: add vsi resource implementation

This patch adds the VSI resource implementation and chip initialization
for the DP, intf, and P4 register modules, covering sub-modules such as
dped, uped, dsch, ustore, dstore, dvn, uvn, and uqm. The new code
populates bitfield structs and writes them to MMIO registers via
nbl_hw_wr_regs().

> diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c
> index aa5e91c2b278..747273217203 100644
> --- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c
> +++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c

[ ... ]

> +static void nbl_configure_dped_checksum(struct nbl_hw_mgt *hw_mgt)
> +{
> + union dped_l4_ck_cmd_40_u l4_ck_cmd_40;
> +
> + /* DPED dped_l4_ck_cmd_40 for sctp */
> + nbl_hw_rd_regs(hw_mgt, NBL_DPED_L4_CK_CMD_40_ADDR, (u32 *)&l4_ck_cmd_40,
> + sizeof(l4_ck_cmd_40));
> + l4_ck_cmd_40.info.en = 1;
> + nbl_hw_wr_regs(hw_mgt, NBL_DPED_L4_CK_CMD_40_ADDR, (u32 *)&l4_ck_cmd_40,
> + sizeof(l4_ck_cmd_40));
> +}

Can this code produce a different bit pattern in the device register
depending on the host endianness?

The pattern used throughout the new init code is to populate C bitfield
structs and then cast them to u32 * and hand them to nbl_hw_wr_regs(),
which ultimately reaches writel(). The C standard leaves bitfield
placement within a storage unit implementation-defined, and GCC places
u32 f:N fields starting from the opposite end depending on the target
endianness. writel() performs a CPU-to-little-endian conversion on the
whole u32, so the register contents seen by the device differ between
little-endian and big-endian kernels compiled from the same source.

> +static void nbl_uped_init(struct nbl_hw_mgt *hw_mgt)
> +{
> + struct ped_hw_edit_profile hw_edit;
> +
> + nbl_hw_rd_regs(hw_mgt, NBL_UPED_HW_EDT_PROF_TABLE(NBL_DPED_V4_TCP_IDX),
> + (u32 *)&hw_edit, sizeof(hw_edit));
> + hw_edit.l3_len = 0;
> + nbl_hw_wr_regs(hw_mgt, NBL_UPED_HW_EDT_PROF_TABLE(NBL_DPED_V4_TCP_IDX),
> + (u32 *)&hw_edit, sizeof(hw_edit));
> +
> + nbl_hw_rd_regs(hw_mgt, NBL_UPED_HW_EDT_PROF_TABLE(NBL_DPED_V6_TCP_IDX),
> + (u32 *)&hw_edit, sizeof(hw_edit));
> + hw_edit.l3_len = 1;
> + nbl_hw_wr_regs(hw_mgt, NBL_UPED_HW_EDT_PROF_TABLE(NBL_DPED_V6_TCP_IDX),
> + (u32 *)&hw_edit, sizeof(hw_edit));
> +}

[ ... ]

> +static void nbl_shaping_eth_init(struct nbl_hw_mgt *hw_mgt, u8 eth_id, u8 speed)
> +{
> + struct nbl_shaping_dvn_dport dvn_dport = { 0 };
> + struct nbl_shaping_dport dport = { 0 };
> + u32 rate, half_rate;
> +
> + if (speed == NBL_FW_PORT_SPEED_100G) {
> + rate = NBL_SHAPING_DPORT_100G_RATE;
> + half_rate = NBL_SHAPING_DPORT_HALF_100G_RATE;
> + } else {
> + rate = NBL_SHAPING_DPORT_25G_RATE;
> + half_rate = NBL_SHAPING_DPORT_HALF_25G_RATE;
> + }
> +
> + dport.cir = rate;
> + dport.pir = rate;
> + dport.depth = max(dport.cir * 2, NBL_LR_LEONIS_NET_BUCKET_DEPTH);
> + dport.cbs = dport.depth;
> + dport.pbs = dport.depth;
> + dport.valid = 1;
> +
> + dvn_dport.cir = half_rate;
> + dvn_dport.pir = rate;
> + dvn_dport.depth = dport.depth;
> + dvn_dport.cbs = dvn_dport.depth;
> + dvn_dport.pbs = dvn_dport.depth;
> + dvn_dport.valid = 1;
> +
> + nbl_hw_wr_regs(hw_mgt, NBL_SHAPING_DPORT_REG(eth_id), (u32 *)&dport,
> + sizeof(dport));
> + nbl_hw_wr_regs(hw_mgt, NBL_SHAPING_DVN_DPORT_REG(eth_id),
> + (u32 *)&dvn_dport, sizeof(dvn_dport));
> +}

The same pattern appears in nbl_shaping_eth_init(), nbl_dsch_qid_max_init(),
nbl_ustore_init(), nbl_dstore_init(), nbl_dvn_descreq_num_cfg(),
nbl_dvn_init(), nbl_uvn_init(), nbl_uqm_init(), and
nbl_configure_dped_checksum().

[ ... ]

> +static void nbl_dvn_descreq_num_cfg(struct nbl_hw_mgt *hw_mgt, u32 descreq_num)
> +{
> + u32 split_ring_num = (descreq_num >> 16) & 0xffff;
> + struct nbl_dvn_descreq_num_cfg num_cfg = { 0 };
> + u32 packet_ring_num = descreq_num & 0xffff;
> +
> + packet_ring_num =
> + clamp(packet_ring_num, PACKET_RING_MIN, PACKET_RING_MAX);
> + num_cfg.packed_l1_num =
> + (packet_ring_num - PACKET_RING_BASE) / PACKET_RING_DIV;
> +
> + split_ring_num = clamp(split_ring_num, SPLIT_RING_MIN,
> + SPLIT_RING_MAX);
> + num_cfg.avring_cfg_num = split_ring_num > SPLIT_RING_MIN ?
> + SPLIT_RING_CFG_16 :
> + SPLIT_RING_CFG_8;
> +
> + nbl_hw_wr_regs(hw_mgt, NBL_DVN_DESCREQ_NUM_CFG, (u32 *)&num_cfg,
> + sizeof(num_cfg));
> +}

Taking struct nbl_dvn_descreq_num_cfg as one example:

struct nbl_dvn_descreq_num_cfg {
u32 avring_cfg_num:1;
u32 rsv0:3;
u32 packed_l1_num:3;
u32 rsv1:25;
};

On a little-endian build, avring_cfg_num ends up in bit 0 of the u32 sent
to the device. On a big-endian build the same field ends up in bit 31 of
the u32 that writel() then byte-swaps to little-endian for the bus,
producing a different pattern at the device. The same concern applies to
struct uvn_desc_prefetch_init, struct uvn_dif_req_ro_flag,
struct dsch_vn_quanta, struct nbl_shaping_dport,
struct nbl_shaping_dvn_dport, struct dstore_disc_bp_th,
struct dstore_port_drop_th, struct dstore_d_dport_fc_th,
struct ustore_pkt_len, struct nbl_ustore_port_drop_th,
struct ped_hw_edit_profile, struct nbl_uqm_que_type, and
union dped_l4_ck_cmd_40_u.

[ ... ]

> +static void nbl_uvn_init(struct nbl_hw_mgt *hw_mgt)
> +{
> + struct uvn_desc_prefetch_init prefetch_init = { 0 };
> + struct uvn_desc_wr_timeout desc_wr_timeout = { 0 };
> + struct uvn_dif_req_ro_flag flag = { 0 };
> + struct uvn_queue_err_mask mask = { 0 };
[ ... ]
> + nbl_hw_wr_regs(hw_mgt, NBL_UVN_DESC_PREFETCH_INIT,
> + (u32 *)&prefetch_init, sizeof(prefetch_init));
> +}

The driver Kconfig uses depends on PCI && (64BIT || COMPILE_TEST), which
permits ppc64be and s390x builds, and there is no BUILD_BUG_ON or
__LITTLE_ENDIAN_BITFIELD / __BIG_ENDIAN_BITFIELD guarding on any of these
structs.

Would it make sense to either add __LITTLE_ENDIAN_BITFIELD /
__BIG_ENDIAN_BITFIELD variants for each of these register-layout structs,
replace the bitfields with explicit u32 masks and shifts, or restrict the
Kconfig to little-endian hosts?