Re: [PATCH net-next] net: fec: add CBS offload support

From: Simon Horman
Date: Thu Feb 09 2023 - 11:14:34 EST


On Thu, Feb 09, 2023 at 05:24:22PM +0800, wei.fang@xxxxxxx wrote:
> From: Wei Fang <wei.fang@xxxxxxx>
>
> The FEC hardware supports the Credit-based shaper (CBS) which control
> the bandwidth distribution between normal traffic and time-sensitive
> traffic with respect to the total link bandwidth available.
> But notice that the bandwidth allocation of hardware is restricted to
> certain values. Below is the equation which is used to calculate the
> BW (bandwidth) fraction for per class:
> BW fraction = 1 / (1 + 512 / idle_slope)
>
> For values of idle_slope less than 128, idle_slope = 2 ^ n, when n =
> 0,1,2,...,6. For values equal to or greater than 128, idle_slope =
> 128 * m, where m = 1,2,3,...,12.
> Example 1. idle_slope = 64, therefore BW fraction = 0.111.
> Example 2. idle_slope = 128, therefore BW fraction = 0.200.
>
> Here is an example command to set 200Mbps bandwidth on 1000Mbps port
> for TC 2 and 111Mbps for TC 3.
> tc qdisc add dev eth0 parent root handle 100 mqprio num_tc 3 map \
> 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 queues 1@0 1@1 1@2 hw 0
> tc qdisc replace dev eth0 parent 100:2 cbs idleslope 200000 \
> sendslope -800000 hicredit 153 locredit -1389 offload 1
> tc qdisc replace dev eth0 parent 100:3 cbs idleslope 111000 \
> sendslope -889000 hicredit 90 locredit -892 offload 1
>
> Signed-off-by: Wei Fang <wei.fang@xxxxxxx>
> ---
> drivers/net/ethernet/freescale/fec.h | 4 +
> drivers/net/ethernet/freescale/fec_main.c | 106 ++++++++++++++++++++++
> 2 files changed, 110 insertions(+)
>
> diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
> index 5ba1e0d71c68..ad5f968aa086 100644
> --- a/drivers/net/ethernet/freescale/fec.h
> +++ b/drivers/net/ethernet/freescale/fec.h
> @@ -340,6 +340,10 @@ struct bufdesc_ex {
> #define RCMR_CMP(X) (((X) == 1) ? RCMR_CMP_1 : RCMR_CMP_2)
> #define FEC_TX_BD_FTYPE(X) (((X) & 0xf) << 20)
>
> +#define FEC_QOS_TX_SHEME_MASK GENMASK(2, 0)
> +#define CREDIT_BASED_SCHEME 0
> +#define ROUND_ROBIN_SCHEME 1
> +
> /* The number of Tx and Rx buffers. These are allocated from the page
> * pool. The code may assume these are power of two, so it it best
> * to keep them that size.
> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> index c73e25f8995e..3bb3a071fa0c 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -66,6 +66,7 @@
> #include <linux/mfd/syscon.h>
> #include <linux/regmap.h>
> #include <soc/imx/cpuidle.h>
> +#include <net/pkt_sched.h>
> #include <linux/filter.h>
> #include <linux/bpf.h>
>
> @@ -3232,6 +3233,110 @@ static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
> return phy_mii_ioctl(phydev, rq, cmd);
> }
>
> +static u32 fec_enet_get_idle_slope(u8 bw)
> +{
> + u32 idle_slope, quotient, msb;
> +
> + /* Convert bw to hardware idle slope */
> + idle_slope = (512 * bw) / (100 - bw);
> +
> + if (idle_slope >= 128) {
> + /* For values equal to or greater than 128, idle_slope = 128 * m,
> + * where m = 1, 2, 3, ...12. Here we use the rounding method.
> + */

Perhaps the following would be clearer?

For values greater than or equal to 128,
idle_slope is rounded to the nearest multiple of 128.

> + quotient = idle_slope / 128;
> + if (idle_slope >= quotient * 128 + 64)
> + idle_slope = 128 * (quotient + 1);
> + else
> + idle_slope = 128 * quotient;

Maybe there is a helper that does this, but if
not, perhaps:

idle_slope = DIV_ROUND_CLOSEST(idle_slope, 128U) * 128U;


> +
> + goto end;

Maybe return here

> + }

Or an else here is nicer?

> +
> + /* For values less than 128, idle_slope = 2 ^ n, where

Perhaps the following would be clearer?

For values less than 128, idle_slope is rounded
to the nearest power of 2.

> + * n = 0, 1, 2, ...6. Here we use the rounding method.

n is 7 for input idle_slope around 128 (2^7)

> + * So the minimum of idle_slope is 1.
> + */
> + msb = fls(idle_slope);
> +
> + if (msb == 0 || msb == 1) {
> + idle_slope = 1;
> + goto end;
> + }

nit: maybe this is nicer

if (msb <= 1)
return 1;

> +
> + msb -= 1;
> + if (idle_slope >= (1 << msb) + (1 << (msb - 1)))
> + idle_slope = 1 << (msb + 1);
> + else
> + idle_slope = 1 << msb;

In the same vein as the suggestion for the >= 128 case, perhaps:

u32 d;

d = BIT(fls(idle_slope));
idle_slope = DIV_ROUND_CLOSEST(idle_slope, d) * d;

> +
> +end:
> + return idle_slope;
> +}
> +
> +static int fec_enet_setup_tc_cbs(struct net_device *ndev, void *type_data)
> +{
> + struct fec_enet_private *fep = netdev_priv(ndev);
> + struct tc_cbs_qopt_offload *cbs = type_data;
> + int queue = cbs->queue;

nit: extra space after '='

> + u32 speed = fep->speed;
> + u32 val, idle_slope;
> + u8 bw;
> +
> + if (!(fep->quirks & FEC_QUIRK_HAS_AVB))
> + return -EOPNOTSUPP;
> +
> + /* Queue 1 for Class A, Queue 2 for Class B, so the ENET must has

nit: s/has/have/

> + * three queues.
> + */
> + if (fep->num_tx_queues != FEC_ENET_MAX_TX_QS)
> + return -EOPNOTSUPP;
> +
> + /* Queue 0 is not AVB capable */
> + if (queue <= 0 || queue >= fep->num_tx_queues)
> + return -EINVAL;
> +
> + val = readl(fep->hwp + FEC_QOS_SCHEME);
> + val &= ~FEC_QOS_TX_SHEME_MASK;
> + if (!cbs->enable) {
> + val |= ROUND_ROBIN_SCHEME;
> + writel(val, fep->hwp + FEC_QOS_SCHEME);
> +
> + return 0;
> + }
> +
> + val |= CREDIT_BASED_SCHEME;
> + writel(val, fep->hwp + FEC_QOS_SCHEME);
> +
> + /* cbs->idleslope is in kilobits per second. speed is the port rate
> + * in megabits per second. So bandwidth ratio bw = (idleslope /
> + * (speed * 1000)) * 100, the unit is percentage.
> + */

suggestion:

/* cbs->idleslope is in kilobits per second.
* Speed is the port rate in megabits per second.
* So bandwidth the ratio, bw, is (idleslope / (speed * 1000)) * 100.
* The unit of bw is a percentage.
*/

> + bw = cbs->idleslope / (speed * 10UL);
> + /* bw% can not >= 100% */
> + if (bw >= 100)
> + return -EINVAL;

nit: maybe the above calculation and check fits better inside
fec_enet_get_idle_slope()

> + idle_slope = fec_enet_get_idle_slope(bw);
> +
> + val = readl(fep->hwp + FEC_DMA_CFG(queue));
> + val &= ~IDLE_SLOPE_MASK;
> + val |= idle_slope & IDLE_SLOPE_MASK;
> + writel(val, fep->hwp + FEC_DMA_CFG(queue));
> +
> + return 0;
> +}
> +
> +static int fec_enet_setup_tc(struct net_device *ndev, enum tc_setup_type type,
> + void *type_data)
> +{
> + switch (type) {
> + case TC_SETUP_QDISC_CBS:
> + return fec_enet_setup_tc_cbs(ndev, type_data);
> + default:
> + return -EOPNOTSUPP;
> + }
> +}
> +
> static void fec_enet_free_buffers(struct net_device *ndev)
> {
> struct fec_enet_private *fep = netdev_priv(ndev);
> @@ -3882,6 +3987,7 @@ static const struct net_device_ops fec_netdev_ops = {
> .ndo_tx_timeout = fec_timeout,
> .ndo_set_mac_address = fec_set_mac_address,
> .ndo_eth_ioctl = fec_enet_ioctl,
> + .ndo_setup_tc = fec_enet_setup_tc,
> #ifdef CONFIG_NET_POLL_CONTROLLER
> .ndo_poll_controller = fec_poll_controller,
> #endif
> --
> 2.25.1
>