On Tue, Jun 22, 2021 at 07:11:10PM +0800, Guangbin Huang wrote:When the driver writes the device space mapped to the WriteCombine,
From: Huazhong Tan <tanhuazhong@xxxxxxxxxx>
For the device that supports the TX push capability, the BD can
be directly copied to the device memory. However, due to hardware
restrictions, the push mode can be used only when there are no
more than two BDs, otherwise, the doorbell mode based on device
memory is used.
Signed-off-by: Huazhong Tan <tanhuazhong@xxxxxxxxxx>
Signed-off-by: Yufeng Mo <moyufeng@xxxxxxxxxx>
---
drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 +
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 83 ++++++++++++++++++++--
drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 6 ++
drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 2 +
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 2 +
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 11 ++-
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 8 +++
.../ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c | 2 +
.../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 11 ++-
.../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 8 +++
10 files changed, 126 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 0b202f4def83..3979d5d2e842 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -163,6 +163,7 @@ struct hnae3_handle;
struct hnae3_queue {
void __iomem *io_base;
+ void __iomem *mem_base;
struct hnae3_ae_algo *ae_algo;
struct hnae3_handle *handle;
int tqp_index; /* index in a handle */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index cdb5f14fb6bc..8649bd8e1b57 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2002,9 +2002,77 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
return bd_num;
}
+static void hns3_tx_push_bd(struct hns3_enet_ring *ring, int num)
+{
+#define HNS3_BYTES_PER_64BIT 8
+
+ struct hns3_desc desc[HNS3_MAX_PUSH_BD_NUM] = {};
+ int offset = 0;
+
+ /* make sure everything is visible to device before
+ * excuting tx push or updating doorbell
+ */
+ dma_wmb();
+
+ do {
+ int idx = (ring->next_to_use - num + ring->desc_num) %
+ ring->desc_num;
+
+ u64_stats_update_begin(&ring->syncp);
+ ring->stats.tx_push++;
+ u64_stats_update_end(&ring->syncp);
+ memcpy(&desc[offset], &ring->desc[idx],
+ sizeof(struct hns3_desc));
+ offset++;
+ } while (--num);
+
+ __iowrite64_copy(ring->tqp->mem_base, desc,
+ (sizeof(struct hns3_desc) * HNS3_MAX_PUSH_BD_NUM) /
+ HNS3_BYTES_PER_64BIT);
+
+#if defined(CONFIG_ARM64)
+ dgh();
+#endif
It looks a bit weird putting this at the end of the function, given that
it's supposed to do something to a pair of accesses. Please can you explain
what it's doing, and also provide some numbers to show that it's worthwhile
(given that it's a performance hint not a correctness thing afaict).
Thanks,+}
+
+static void hns3_tx_mem_doorbell(struct hns3_enet_ring *ring)
+{
+#define HNS3_MEM_DOORBELL_OFFSET 64
+
+ __le64 bd_num = cpu_to_le64((u64)ring->pending_buf);
+
+ /* make sure everything is visible to device before
+ * excuting tx push or updating doorbell
+ */
+ dma_wmb();
+
+ __iowrite64_copy(ring->tqp->mem_base + HNS3_MEM_DOORBELL_OFFSET,
+ &bd_num, 1);
+ u64_stats_update_begin(&ring->syncp);
+ ring->stats.tx_mem_doorbell += ring->pending_buf;
+ u64_stats_update_end(&ring->syncp);
+
+#if defined(CONFIG_ARM64)
+ dgh();
+#endif
Same here.
Thanks,
Will
.