RE: [PATCH net-next 3/5] net/sched: taprio: add netlink reporting for offload statistics counters

From: Zulkifli, Muhammad Husaini
Date: Wed May 31 2023 - 06:55:05 EST


Hi Vladimir,

> -----Original Message-----
> From: Vladimir Oltean <vladimir.oltean@xxxxxxx>
> Sent: Tuesday, 30 May, 2023 5:20 PM
> To: netdev@xxxxxxxxxxxxxxx
> Cc: David S. Miller <davem@xxxxxxxxxxxxx>; Eric Dumazet
> <edumazet@xxxxxxxxxx>; Jakub Kicinski <kuba@xxxxxxxxxx>; Paolo Abeni
> <pabeni@xxxxxxxxxx>; Hadi Salim, Jamal <jhs@xxxxxxxxxxxx>; Cong Wang
> <xiyou.wangcong@xxxxxxxxx>; Jiri Pirko <jiri@xxxxxxxxxxx>; Gomes, Vinicius
> <vinicius.gomes@xxxxxxxxx>; Kurt Kanzenbach <kurt@xxxxxxxxxxxxx>;
> Gerhard Engleder <gerhard@xxxxxxxxxxxxxxxxxxxxx>; Nambiar, Amritha
> <amritha.nambiar@xxxxxxxxx>; Ferenc Fejes <ferenc.fejes@xxxxxxxxxxxx>;
> Xiaoliang Yang <xiaoliang.yang_1@xxxxxxx>; Roger Quadros
> <rogerq@xxxxxxxxxx>; Pranavi Somisetty <pranavi.somisetty@xxxxxxx>;
> Harini Katakam <harini.katakam@xxxxxxx>; Giuseppe Cavallaro
> <peppe.cavallaro@xxxxxx>; Alexandre Torgue
> <alexandre.torgue@xxxxxxxxxxx>; Sit, Michael Wei Hong
> <michael.wei.hong.sit@xxxxxxxxx>; Ismail, Mohammad Athari
> <mohammad.athari.ismail@xxxxxxxxx>; Oleksij Rempel <linux@rempel-
> privat.de>; Keller, Jacob E <jacob.e.keller@xxxxxxxxx>; linux-
> kernel@xxxxxxxxxxxxxxx; Andrew Lunn <andrew@xxxxxxx>; Florian Fainelli
> <f.fainelli@xxxxxxxxx>; Claudiu Manoil <claudiu.manoil@xxxxxxx>;
> Alexandre Belloni <alexandre.belloni@xxxxxxxxxxx>;
> UNGLinuxDriver@xxxxxxxxxxxxx; Brandeburg, Jesse
> <jesse.brandeburg@xxxxxxxxx>; Nguyen, Anthony L
> <anthony.l.nguyen@xxxxxxxxx>; Horatiu Vultur
> <horatiu.vultur@xxxxxxxxxxxxx>; Jose Abreu <joabreu@xxxxxxxxxxxx>;
> Maxime Coquelin <mcoquelin.stm32@xxxxxxxxx>; intel-wired-
> lan@xxxxxxxxxxxxxxxx; Zulkifli, Muhammad Husaini
> <muhammad.husaini.zulkifli@xxxxxxxxx>
> Subject: [PATCH net-next 3/5] net/sched: taprio: add netlink reporting for
> offload statistics counters
>
> Offloading drivers may report some additional statistics counters, some of
> them even suggested by 802.1Q, like TransmissionOverrun.
>
> In my opinion we don't have to limit ourselves to reporting counters only
> globally to the Qdisc/interface, especially if the device has more detailed
> reporting (per traffic class), since the more detailed info is valuable for
> debugging and can help identifying who is exceeding its time slot.
>
> But on the other hand, some devices may not be able to report both per TC
> and global stats.
>
> So we end up reporting both ways, and use the good old ethtool_put_stat()
> strategy to determine which statistics are supported by this NIC.
> Statistics which aren't set are simply not reported to netlink. For this reason,
> we need something dynamic (a nlattr nest) to be reported through
> TCA_STATS_APP, and not something daft like the fixed-size and inextensible
> struct tc_codel_xstats. A good model for xstats which are a nlattr nest rather
> than a fixed struct seems to be cake.
>
> # Global stats
> $ tc -s qdisc show dev eth0 root
> # Per-tc stats
> $ tc -s class show dev eth0
>
> Signed-off-by: Vladimir Oltean <vladimir.oltean@xxxxxxx>

Tested-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@xxxxxxxxx>

Thanks for the patch :)
I applied my changes on top of your patch series, and it worked for both
TAPRIO_CMD_STATS and TAPRIO_CMD_TC_STATS. Awesome!

> ---
> include/net/pkt_sched.h | 47 ++++++++++++++++----
> include/uapi/linux/pkt_sched.h | 10 +++++
> net/sched/sch_taprio.c | 78 +++++++++++++++++++++++++++++++++-
> 3 files changed, 126 insertions(+), 9 deletions(-)
>
> diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index
> f5fb11da357b..530d33adec88 100644
> --- a/include/net/pkt_sched.h
> +++ b/include/net/pkt_sched.h
> @@ -188,6 +188,27 @@ struct tc_taprio_caps { enum tc_taprio_qopt_cmd {
> TAPRIO_CMD_REPLACE,
> TAPRIO_CMD_DESTROY,
> + TAPRIO_CMD_STATS,
> + TAPRIO_CMD_TC_STATS,
> +};
> +
> +/**
> + * struct tc_taprio_qopt_stats - IEEE 802.1Qbv statistics
> + * @window_drops: Frames that were dropped because they were too large
> to be
> + * transmitted in any of the allotted time windows (open gates) for their
> + * traffic class.
> + * @tx_overruns: Frames still being transmitted by the MAC after the
> + * transmission gate associated with their traffic class has closed.
> + * Equivalent to `12.29.1.1.2 TransmissionOverrun` from 802.1Q-2018.
> + */
> +struct tc_taprio_qopt_stats {
> + u64 window_drops;
> + u64 tx_overruns;
> +};
> +
> +struct tc_taprio_qopt_tc_stats {
> + int tc;
> + struct tc_taprio_qopt_stats stats;
> };
>
> struct tc_taprio_sched_entry {
> @@ -199,16 +220,26 @@ struct tc_taprio_sched_entry { };
>
> struct tc_taprio_qopt_offload {
> - struct tc_mqprio_qopt_offload mqprio;
> - struct netlink_ext_ack *extack;
> enum tc_taprio_qopt_cmd cmd;
> - ktime_t base_time;
> - u64 cycle_time;
> - u64 cycle_time_extension;
> - u32 max_sdu[TC_MAX_QUEUE];
>
> - size_t num_entries;
> - struct tc_taprio_sched_entry entries[];
> + union {
> + /* TAPRIO_CMD_STATS */
> + struct tc_taprio_qopt_stats stats;
> + /* TAPRIO_CMD_TC_STATS */
> + struct tc_taprio_qopt_tc_stats tc_stats;
> + /* TAPRIO_CMD_REPLACE */
> + struct {
> + struct tc_mqprio_qopt_offload mqprio;
> + struct netlink_ext_ack *extack;
> + ktime_t base_time;
> + u64 cycle_time;
> + u64 cycle_time_extension;
> + u32 max_sdu[TC_MAX_QUEUE];
> +
> + size_t num_entries;
> + struct tc_taprio_sched_entry entries[];
> + };
> + };
> };
>
> #if IS_ENABLED(CONFIG_NET_SCH_TAPRIO)
> diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> index 51a7addc56c6..00f6ff0aff1f 100644
> --- a/include/uapi/linux/pkt_sched.h
> +++ b/include/uapi/linux/pkt_sched.h
> @@ -1259,6 +1259,16 @@ enum {
> TCA_TAPRIO_TC_ENTRY_MAX = (__TCA_TAPRIO_TC_ENTRY_CNT - 1)
> };
>
> +enum {
> + TCA_TAPRIO_OFFLOAD_STATS_PAD = 1, /* u64 */
> + TCA_TAPRIO_OFFLOAD_STATS_WINDOW_DROPS, /* u64 */
> + TCA_TAPRIO_OFFLOAD_STATS_TX_OVERRUNS, /* u64 */
> +
> + /* add new constants above here */
> + __TCA_TAPRIO_OFFLOAD_STATS_CNT,
> + TCA_TAPRIO_OFFLOAD_STATS_MAX =
> (__TCA_TAPRIO_OFFLOAD_STATS_CNT - 1) };
> +
> enum {
> TCA_TAPRIO_ATTR_UNSPEC,
> TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */ diff --git
> a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index
> 06bf4c6355a5..3c4c2c334878 100644
> --- a/net/sched/sch_taprio.c
> +++ b/net/sched/sch_taprio.c
> @@ -27,6 +27,8 @@
> #include <net/sock.h>
> #include <net/tcp.h>
>
> +#define TAPRIO_STAT_NOT_SET (~0ULL)
> +
> #include "sch_mqprio_lib.h"
>
> static LIST_HEAD(taprio_list);
> @@ -2289,6 +2291,72 @@ static int taprio_dump_tc_entries(struct sk_buff
> *skb,
> return -EMSGSIZE;
> }
>
> +static int taprio_put_stat(struct sk_buff *skb, u64 val, u16 attrtype)
> +{
> + if (val == TAPRIO_STAT_NOT_SET)
> + return 0;
> + if (nla_put_u64_64bit(skb, attrtype, val,
> TCA_TAPRIO_OFFLOAD_STATS_PAD))
> + return -EMSGSIZE;
> + return 0;
> +}
> +
> +static int taprio_dump_xstats(struct Qdisc *sch, struct gnet_dump *d,
> + struct tc_taprio_qopt_offload *offload,
> + struct tc_taprio_qopt_stats *stats) {
> + struct net_device *dev = qdisc_dev(sch);
> + const struct net_device_ops *ops;
> + struct sk_buff *skb = d->skb;
> + struct nlattr *xstats;
> + int err;
> +
> + ops = qdisc_dev(sch)->netdev_ops;
> +
> + /* FIXME I could use qdisc_offload_dump_helper(), but that messes
> + * with sch->flags depending on whether the device reports taprio
> + * stats, and I'm not sure whether that's a good idea, considering
> + * that stats are optional to the offload itself
> + */
> + if (!ops->ndo_setup_tc)
> + return 0;
> +
> + memset(stats, 0xff, sizeof(*stats));
> +
> + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
> + if (err == -EOPNOTSUPP)
> + return 0;
> + if (err)
> + return err;
> +
> + xstats = nla_nest_start(skb, TCA_STATS_APP);
> + if (!xstats)
> + goto err;
> +
> + if (taprio_put_stat(skb, stats->window_drops,
> + TCA_TAPRIO_OFFLOAD_STATS_WINDOW_DROPS)
> ||
> + taprio_put_stat(skb, stats->tx_overruns,
> + TCA_TAPRIO_OFFLOAD_STATS_TX_OVERRUNS))
> + goto err_cancel;
> +
> + nla_nest_end(skb, xstats);
> +
> + return 0;
> +
> +err_cancel:
> + nla_nest_cancel(skb, xstats);
> +err:
> + return -EMSGSIZE;
> +}
> +
> +static int taprio_dump_stats(struct Qdisc *sch, struct gnet_dump *d) {
> + struct tc_taprio_qopt_offload offload = {
> + .cmd = TAPRIO_CMD_STATS,
> + };
> +
> + return taprio_dump_xstats(sch, d, &offload, &offload.stats); }
> +
> static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) {
> struct taprio_sched *q = qdisc_priv(sch); @@ -2389,11 +2457,18 @@
> static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, {
> struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
> struct Qdisc *child = dev_queue->qdisc_sleeping;
> + struct tc_taprio_qopt_offload offload = {
> + .cmd = TAPRIO_CMD_TC_STATS,
> + .tc_stats = {
> + .tc = cl - 1,
> + },
> + };
>
> if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 ||
> qdisc_qstats_copy(d, child) < 0)
> return -1;
> - return 0;
> +
> + return taprio_dump_xstats(sch, d, &offload, &offload.tc_stats.stats);
> }
>
> static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) @@ -
> 2440,6 +2515,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly
> = {
> .dequeue = taprio_dequeue,
> .enqueue = taprio_enqueue,
> .dump = taprio_dump,
> + .dump_stats = taprio_dump_stats,
> .owner = THIS_MODULE,
> };
>
> --
> 2.34.1