Re: [PATCH net-next 2/3] net: hns3: add support for TX push mode

From: Will Deacon
Date: Tue Jun 22 2021 - 08:16:19 EST


On Tue, Jun 22, 2021 at 07:11:10PM +0800, Guangbin Huang wrote:
> From: Huazhong Tan <tanhuazhong@xxxxxxxxxx>
>
> For the device that supports the TX push capability, the BD can
> be directly copied to the device memory. However, due to hardware
> restrictions, the push mode can be used only when there are no
> more than two BDs, otherwise, the doorbell mode based on device
> memory is used.
>
> Signed-off-by: Huazhong Tan <tanhuazhong@xxxxxxxxxx>
> Signed-off-by: Yufeng Mo <moyufeng@xxxxxxxxxx>
> ---
> drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 +
> drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 83 ++++++++++++++++++++--
> drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 6 ++
> drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 2 +
> .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 2 +
> .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 11 ++-
> .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 8 +++
> .../ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c | 2 +
> .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 11 ++-
> .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 8 +++
> 10 files changed, 126 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
> index 0b202f4def83..3979d5d2e842 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
> +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
> @@ -163,6 +163,7 @@ struct hnae3_handle;
>
> struct hnae3_queue {
> void __iomem *io_base;
> + void __iomem *mem_base;
> struct hnae3_ae_algo *ae_algo;
> struct hnae3_handle *handle;
> int tqp_index; /* index in a handle */
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> index cdb5f14fb6bc..8649bd8e1b57 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> @@ -2002,9 +2002,77 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
> return bd_num;
> }
>
> +static void hns3_tx_push_bd(struct hns3_enet_ring *ring, int num)
> +{
> +#define HNS3_BYTES_PER_64BIT 8
> +
> + struct hns3_desc desc[HNS3_MAX_PUSH_BD_NUM] = {};
> + int offset = 0;
> +
> + /* make sure everything is visible to device before
> + * excuting tx push or updating doorbell
> + */
> + dma_wmb();
> +
> + do {
> + int idx = (ring->next_to_use - num + ring->desc_num) %
> + ring->desc_num;
> +
> + u64_stats_update_begin(&ring->syncp);
> + ring->stats.tx_push++;
> + u64_stats_update_end(&ring->syncp);
> + memcpy(&desc[offset], &ring->desc[idx],
> + sizeof(struct hns3_desc));
> + offset++;
> + } while (--num);
> +
> + __iowrite64_copy(ring->tqp->mem_base, desc,
> + (sizeof(struct hns3_desc) * HNS3_MAX_PUSH_BD_NUM) /
> + HNS3_BYTES_PER_64BIT);
> +
> +#if defined(CONFIG_ARM64)
> + dgh();
> +#endif

It looks a bit weird putting this at the end of the function, given that
it's supposed to do something to a pair of accesses. Please can you explain
what it's doing, and also provide some numbers to show that it's worthwhile
(given that it's a performance hint not a correctness thing afaict).

> +}
> +
> +static void hns3_tx_mem_doorbell(struct hns3_enet_ring *ring)
> +{
> +#define HNS3_MEM_DOORBELL_OFFSET 64
> +
> + __le64 bd_num = cpu_to_le64((u64)ring->pending_buf);
> +
> + /* make sure everything is visible to device before
> + * excuting tx push or updating doorbell
> + */
> + dma_wmb();
> +
> + __iowrite64_copy(ring->tqp->mem_base + HNS3_MEM_DOORBELL_OFFSET,
> + &bd_num, 1);
> + u64_stats_update_begin(&ring->syncp);
> + ring->stats.tx_mem_doorbell += ring->pending_buf;
> + u64_stats_update_end(&ring->syncp);
> +
> +#if defined(CONFIG_ARM64)
> + dgh();
> +#endif

Same here.

Thanks,

Will