Re: [PATCH V2 net-next] net: fec: add CBS offload support
From: Alexander Lobakin
Date: Mon Feb 13 2023 - 11:07:47 EST
From: Wei Fang <wei.fang@xxxxxxx>
Date: Mon, 13 Feb 2023 17:29:12 +0800
> From: Wei Fang <wei.fang@xxxxxxx>
>
> The FEC hardware supports the Credit-based shaper (CBS) which control
> the bandwidth distribution between normal traffic and time-sensitive
> traffic with respect to the total link bandwidth available.
> But notice that the bandwidth allocation of hardware is restricted to
> certain values. Below is the equation which is used to calculate the
> BW (bandwidth) fraction for per class:
> BW fraction = 1 / (1 + 512 / idle_slope)
[...]
> @@ -571,6 +575,12 @@ struct fec_stop_mode_gpr {
> u8 bit;
> };
>
> +struct fec_cbs_params {
> + bool enable[FEC_ENET_MAX_TX_QS];
Maybe smth like
DECLARE_BITMAP(enabled, FEC_ENET_MAX_TX_QS);
?
> + int idleslope[FEC_ENET_MAX_TX_QS];
> + int sendslope[FEC_ENET_MAX_TX_QS];
Can they actually be negative? (probably I'll see it below)
> +};
> +
> /* The FEC buffer descriptors track the ring buffers. The rx_bd_base and
> * tx_bd_base always point to the base of the buffer descriptors. The
> * cur_rx and cur_tx point to the currently available buffer.
> @@ -679,6 +689,9 @@ struct fec_enet_private {
> /* XDP BPF Program */
> struct bpf_prog *xdp_prog;
>
> + /* CBS parameters */
> + struct fec_cbs_params cbs;
> +
> u64 ethtool_stats[];
> };
>
> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> index c73e25f8995e..91394ad05121 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -66,6 +66,7 @@
> #include <linux/mfd/syscon.h>
> #include <linux/regmap.h>
> #include <soc/imx/cpuidle.h>
> +#include <net/pkt_sched.h>
Some alphabetic order? At least for new files :D
> #include <linux/filter.h>
> #include <linux/bpf.h>
>
> @@ -1023,6 +1024,174 @@ static void fec_enet_reset_skb(struct net_device *ndev)
> }
> }
>
> +static u32 fec_enet_get_idle_slope(u8 bw)
Just use `u32`, usually there's no reason to use types shorter than
integer on the stack.
> +{
> + int msb, power;
> + u32 idle_slope;
> +
> + if (bw >= 100)
> + return 0;
> +
> + /* Convert bw to hardware idle slope */
> + idle_slope = (512 * bw) / (100 - bw);
> +
Redundant newline. Also first pair of braces is optional and up to you.
> + if (idle_slope >= 128) {
> + /* For values greater than or equal to 128, idle_slope
> + * rounded to the nearest multiple of 128.
> + */
But you can just do
idle_slope = min(idle_slope, 128U);
and still use the "standard" path below, without the conditional return?
Or even combine it with the code above:
idle_slope = min((512 * bw) / (100 - bw), 128U);
> + idle_slope = DIV_ROUND_CLOSEST(idle_slope, 128U) * 128U;
> +
> + return idle_slope;
> + }
> +
> + /* For values less than 128, idle_slope is rounded to
> + * nearst power of 2.
Typo, 'nearest'.
> + */
> + if (idle_slope <= 1)
> + return 1;
> +
> + msb = __fls(idle_slope);
I think `fls() - 1` is preferred over `__fls()` since it may go UB. And
some checks wouldn't hurt.
> + power = BIT(msb);
Oh okay. Then rounddown_pow_of_two() is what you're looking for.
power = rounddown_pow_of_two(idle_slope);
Or even just use one variable, @idle_slope.
> + idle_slope = DIV_ROUND_CLOSEST(idle_slope, power) * power;
> +
> + return idle_slope;
You can return DIV_ROUND_ ... right away, without assignning first.
Also, I'm thinking of that this might be a generic helper. We have
roundup() and rounddown(), this could be something like "round_closest()"?
> +}
> +
> +static void fec_enet_set_cbs_idle_slope(struct fec_enet_private *fep)
> +{
> + u32 bw, val, idle_slope;
> + int speed = fep->speed;
> + int idle_slope_sum = 0;
> + int i;
Can any of them be negative?
> +
> + if (!speed)
> + return;
> +
> + for (i = 1; i < FEC_ENET_MAX_TX_QS; i++) {
So you don't use the zeroth array elements at all? Why having them then?
> + int port_tx_rate;
(same for type)
> +
> + /* As defined in IEEE 802.1Q-2014 Section 8.6.8.2 item:
> + * sendslope = idleslope - port_tx_rate
> + * So we need to check whether port_tx_rate is equal to
> + * the current link rate.
[...]
> + for (i = 1; i < FEC_ENET_MAX_TX_QS; i++) {
> + bw = fep->cbs.idleslope[i] / (speed * 10);
> + idle_slope = fec_enet_get_idle_slope(bw);
> +
> + val = readl(fep->hwp + FEC_DMA_CFG(i));
> + val &= ~IDLE_SLOPE_MASK;
> + val |= idle_slope & IDLE_SLOPE_MASK;
u32_replace_bits() will do it for you.
> + writel(val, fep->hwp + FEC_DMA_CFG(i));
> + }
> +
> + /* Enable Credit-based shaper. */
> + val = readl(fep->hwp + FEC_QOS_SCHEME);
> + val &= ~FEC_QOS_TX_SHEME_MASK;
> + val |= CREDIT_BASED_SCHEME;
(same)
> + writel(val, fep->hwp + FEC_QOS_SCHEME);
> +}
> +
> +static int fec_enet_setup_tc_cbs(struct net_device *ndev, void *type_data)
> +{
> + struct fec_enet_private *fep = netdev_priv(ndev);
> + struct tc_cbs_qopt_offload *cbs = type_data;
> + int queue = cbs->queue;
> + int speed = fep->speed;
> + int queue2;
(types)
> +
> + if (!(fep->quirks & FEC_QUIRK_HAS_AVB))
> + return -EOPNOTSUPP;
> +
> + /* Queue 1 for Class A, Queue 2 for Class B, so the ENET must
> + * have three queues.
> + */
> + if (fep->num_tx_queues != FEC_ENET_MAX_TX_QS)
> + return -EOPNOTSUPP;
> +
> + if (!speed) {
> + netdev_err(ndev, "Link speed is 0!\n");
??? Is this possible? If so, why is it checked only here and why can it
be possible?
> + return -ECANCELED;
(already mentioned in other review)
> + }
> +
> + /* Queue 0 is not AVB capable */
> + if (queue <= 0 || queue >= fep->num_tx_queues) {
Is `< 0` possible? I realize it's `s32`, just curious.
> + netdev_err(ndev, "The queue: %d is invalid!\n", queue);
Maybe less emotions? There's no point in having `!` at the end of every
error.
> + return -EINVAL;
> + }
> +
> + if (!cbs->enable) {
> + u32 val;
> +
> + val = readl(fep->hwp + FEC_QOS_SCHEME);
> + val &= ~FEC_QOS_TX_SHEME_MASK;
> + val |= ROUND_ROBIN_SCHEME;
(u32_replace_bits())
> + writel(val, fep->hwp + FEC_QOS_SCHEME);
> +
> + memset(&fep->cbs, 0, sizeof(fep->cbs));
> +
> + return 0;
> + }
> +
> + if (cbs->idleslope - cbs->sendslope != speed * 1000 ||
> + cbs->idleslope <= 0 || cbs->sendslope >= 0)
So you check slopes here, why check them above then?
> + return -EINVAL;
> +
> + /* Another AVB queue */
> + queue2 = (queue == 1) ? 2 : 1;
Braces are unneeded.
> + if (cbs->idleslope + fep->cbs.idleslope[queue2] > speed * 1000) {
> + netdev_err(ndev,
> + "The sum of all idle slope can't exceed link speed!\n");
> + return -EINVAL;
> + }
[...]
Thanks,
Olek