Re: [PATCH net-next v3 1/6] net: ethernet: ti: am65-cpsw: Introduce multi queue Rx
From: Roger Quadros
Date: Sat Jul 27 2024 - 02:27:49 EST
On 24/07/2024 00:10, Brett Creeley wrote:
>
>
> On 7/3/2024 6:51 AM, Roger Quadros wrote:
>> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
>>
>>
>> am65-cpsw can support up to 8 queues at Rx.
>> Use a macro AM65_CPSW_MAX_RX_QUEUES to indicate that.
>> As there is only one DMA channel for RX traffic, the
>> 8 queues come as 8 flows in that channel.
>>
>> By default, we will start with 1 flow as defined by the
>> macro AM65_CPSW_DEFAULT_RX_CHN_FLOWS.
>>
>> User can change the number of flows by ethtool like so
>> 'ethtool -L ethx rx <N>'
>>
>> All traffic will still come on flow 0. To get traffic on
>> different flows the Classifiers will need to be set up.
>>
>> Signed-off-by: Roger Quadros <rogerq@xxxxxxxxxx>
>> Reviewed-by: Simon Horman <horms@xxxxxxxxxx>
>> ---
>> Changelog:
>> v3:
>> - style fixes: reverse xmas tree and checkpatch.pl --max-line-length=80
>> - typo fix: Classifer -> Classifier
>> - added Reviewed-by Simon Horman
>> ---
>> drivers/net/ethernet/ti/am65-cpsw-ethtool.c | 62 +++--
>> drivers/net/ethernet/ti/am65-cpsw-nuss.c | 367 ++++++++++++++++------------
>> drivers/net/ethernet/ti/am65-cpsw-nuss.h | 36 +--
>> 3 files changed, 284 insertions(+), 181 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
>> index a1d0935d1ebe..01e3967852e0 100644
>> --- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
>> +++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
>> @@ -429,7 +429,7 @@ static void am65_cpsw_get_channels(struct net_device *ndev,
>>
>> ch->max_rx = AM65_CPSW_MAX_RX_QUEUES;
>> ch->max_tx = AM65_CPSW_MAX_TX_QUEUES;
>> - ch->rx_count = AM65_CPSW_MAX_RX_QUEUES;
>> + ch->rx_count = common->rx_ch_num_flows;
>> ch->tx_count = common->tx_ch_num;
>> }
>>
>> @@ -448,8 +448,10 @@ static int am65_cpsw_set_channels(struct net_device *ndev,
>> return -EBUSY;
>>
>> am65_cpsw_nuss_remove_tx_chns(common);
>> + am65_cpsw_nuss_remove_rx_chns(common);
>>
>> - return am65_cpsw_nuss_update_tx_chns(common, chs->tx_count);
>> + return am65_cpsw_nuss_update_tx_rx_chns(common, chs->tx_count,
>> + chs->rx_count);
>> }
>>
>> static void
>> @@ -920,11 +922,13 @@ static int am65_cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coales
>> struct netlink_ext_ack *extack)
>> {
>> struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
>> + struct am65_cpsw_rx_flow *rx_flow;
>> struct am65_cpsw_tx_chn *tx_chn;
>>
>> tx_chn = &common->tx_chns[0];
>> + rx_flow = &common->rx_chns.flows[0];
>>
>> - coal->rx_coalesce_usecs = common->rx_pace_timeout / 1000;
>> + coal->rx_coalesce_usecs = rx_flow->rx_pace_timeout / 1000;
>> coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout / 1000;
>>
>> return 0;
>> @@ -934,14 +938,26 @@ static int am65_cpsw_get_per_queue_coalesce(struct net_device *ndev, u32 queue,
>> struct ethtool_coalesce *coal)
>> {
>> struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
>> + struct am65_cpsw_rx_flow *rx_flow;
>> struct am65_cpsw_tx_chn *tx_chn;
>>
>> - if (queue >= AM65_CPSW_MAX_TX_QUEUES)
>> + if (queue >= AM65_CPSW_MAX_TX_QUEUES &&
>> + queue >= AM65_CPSW_MAX_RX_QUEUES)
>> return -EINVAL;
>>
>> - tx_chn = &common->tx_chns[queue];
>> + if (queue < AM65_CPSW_MAX_TX_QUEUES) {
>> + tx_chn = &common->tx_chns[queue];
>> + coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout / 1000;
>> + } else {
>> + coal->tx_coalesce_usecs = ~0;
>> + }
>>
>> - coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout / 1000;
>> + if (queue < AM65_CPSW_MAX_RX_QUEUES) {
>> + rx_flow = &common->rx_chns.flows[queue];
>> + coal->rx_coalesce_usecs = rx_flow->rx_pace_timeout / 1000;
>> + } else {
>> + coal->rx_coalesce_usecs = ~0;
>> + }
>
> Minor nit, but after removing the dead code below the check for queue being greater than max values, I think am65_cpsw_get_coalesce() and am65_get_per_queue_coalesce() are identical except the "u32 queue" argument.
>
> I think you could do something like the following:
>
> static int am65_cpsw_get_per_queue_coalesce(struct net_device *ndev,
> struct ethtool_coalesce *coal,
> struct netlink_ext_ack *extack)
> {
> return __am65_cpsw_get_coalesce(ndev, coal, 0);
> }
>
> static int am65_cpsw_get_coalesce(struct net_device *ndev, u32 queue,
> struct ethtool_coalesce *coal,
> struct netlink_ext_ack *extack,
> u32 )
> {
> return __am65_cpsw_get_coalesce(ndev, coal, queue);
> }
>
Sure, this is much nicer.
>>
>> return 0;
>> }
>> @@ -951,9 +967,11 @@ static int am65_cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coales
>> struct netlink_ext_ack *extack)
>> {
>> struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
>> + struct am65_cpsw_rx_flow *rx_flow;
>> struct am65_cpsw_tx_chn *tx_chn;
>>
>> tx_chn = &common->tx_chns[0];
>> + rx_flow = &common->rx_chns.flows[0];
>>
>> if (coal->rx_coalesce_usecs && coal->rx_coalesce_usecs < 20)
>> return -EINVAL;
>> @@ -961,7 +979,7 @@ static int am65_cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coales
>> if (coal->tx_coalesce_usecs && coal->tx_coalesce_usecs < 20)
>> return -EINVAL;
>
> Why does this return -EINVAL here, but am65_cpsw_set_per_queue_coalesce() prints a dev_info() and then set the value to 20?
>
> Would it better to have consistent behavior? Maybe I'm missing some context or reasoning here?
Consistent behaviour is better.
>
>>
>> - common->rx_pace_timeout = coal->rx_coalesce_usecs * 1000;
>> + rx_flow->rx_pace_timeout = coal->rx_coalesce_usecs * 1000;
>> tx_chn->tx_pace_timeout = coal->tx_coalesce_usecs * 1000;
>>
>> return 0;
>> @@ -971,20 +989,36 @@ static int am65_cpsw_set_per_queue_coalesce(struct net_device *ndev, u32 queue,
>> struct ethtool_coalesce *coal)
>> {
>> struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
>> + struct am65_cpsw_rx_flow *rx_flow;
>> struct am65_cpsw_tx_chn *tx_chn;
>>
>> - if (queue >= AM65_CPSW_MAX_TX_QUEUES)
>> + if (queue >= AM65_CPSW_MAX_TX_QUEUES &&
>> + queue >= AM65_CPSW_MAX_RX_QUEUES)
>> return -EINVAL;
>>
>> - tx_chn = &common->tx_chns[queue];
>> + if (queue < AM65_CPSW_MAX_TX_QUEUES) {
>> + tx_chn = &common->tx_chns[queue];
>> +
>> + if (coal->tx_coalesce_usecs && coal->tx_coalesce_usecs < 20) {
>> + dev_info(common->dev, "defaulting to min value of 20us for tx-usecs for tx-%u\n",
>> + queue);
>> + coal->tx_coalesce_usecs = 20;
>> + }
>>
>> - if (coal->tx_coalesce_usecs && coal->tx_coalesce_usecs < 20) {
>> - dev_info(common->dev, "defaulting to min value of 20us for tx-usecs for tx-%u\n",
>> - queue);
>> - coal->tx_coalesce_usecs = 20;
>> + tx_chn->tx_pace_timeout = coal->tx_coalesce_usecs * 1000;
>> }
>>
>> - tx_chn->tx_pace_timeout = coal->tx_coalesce_usecs * 1000;
>> + if (queue < AM65_CPSW_MAX_RX_QUEUES) {
>> + rx_flow = &common->rx_chns.flows[queue];
>> +
>> + if (coal->rx_coalesce_usecs && coal->rx_coalesce_usecs < 20) {
>> + dev_info(common->dev, "defaulting to min value of 20us for rx-usecs for rx-%u\n",
>> + queue);
>
> Would it make more sense to just return -EINVAL here similar to the standard "set_coalesce"?
Yes, I'll do that in next spin.
>
>> + coal->rx_coalesce_usecs = 20;
>> + }
>> +
>> + rx_flow->rx_pace_timeout = coal->rx_coalesce_usecs * 1000;
>> + }
>>
>> return 0;
>> }
>
> I think my comment to the "get" and "get_per_queue" versions of these functions also applies here, but only if the behavior of returning -EINVAL or setting a value for the user is the same between the "set" and "set_per_queue".
Thanks for the review!
>
> Thanks,
>
> Brett
>
> <snip>
>
--
cheers,
-roger