[RFC v2 4/4] hisi_acc_vfio_pci: Add support for vfio live migration

From: Shameer Kolothum
Date: Fri Jul 02 2021 - 06:00:03 EST


From: Longfang Liu <liulongfang@xxxxxxxxxx>

VMs assigned with HiSilicon ACC VF devices can now perform
live migration if the VF devices are bind to the hisi-acc-vfio-pci
driver.

Signed-off-by: Longfang Liu <liulongfang@xxxxxxxxxx>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@xxxxxxxxxx>
---
drivers/vfio/pci/Kconfig | 10 +-
drivers/vfio/pci/hisi_acc_vfio_pci.c | 1023 +++++++++++++++++++++++++-
drivers/vfio/pci/hisi_acc_vfio_pci.h | 144 ++++
3 files changed, 1170 insertions(+), 7 deletions(-)
create mode 100644 drivers/vfio/pci/hisi_acc_vfio_pci.h

diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 709807c28153..25dec1bb7741 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -59,10 +59,14 @@ config MLX5_VFIO_PCI
If you don't know what to do here, say N.

config HISI_ACC_VFIO_PCI
- tristate "VFIO support for HiSilicon ACC devices"
+ tristate "VFIO live migration support for HiSilicon ACC devices"
depends on ARM64 && VFIO_PCI_CORE
+ select CRYPTO_DEV_HISI_QM
+ depends on PCI && PCI_MSI
+ depends on UACCE || UACCE=n
+ depends on ACPI
help
- This provides generic PCI support for HiSilicon devices using the VFIO
- framework.
+ This provides live migration support for HiSilicon ACC devices
+ using the VFIO framework.

If you don't know what to do here, say N.
diff --git a/drivers/vfio/pci/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisi_acc_vfio_pci.c
index d57cf6d7adaf..f7906a151def 100644
--- a/drivers/vfio/pci/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisi_acc_vfio_pci.c
@@ -12,6 +12,1015 @@
#include <linux/vfio.h>
#include <linux/vfio_pci_core.h>

+#include "hisi_acc_vfio_pci.h"
+
+/* return 0 VM acc device ready, -ETIMEDOUT hardware timeout */
+static int qm_wait_dev_ready(struct hisi_qm *qm)
+{
+ u32 val;
+
+ return readl_relaxed_poll_timeout(qm->io_base + QM_VF_STATE,
+ val, !(val & 0x1), MB_POLL_PERIOD_US,
+ MB_POLL_TIMEOUT_US);
+}
+
+/*
+ * Each state Reg is checked 100 times,
+ * with a delay of 100 microseconds after each check
+ */
+static u32 acc_check_reg_state(struct hisi_qm *qm, u32 regs)
+{
+ int check_times = 0;
+ u32 state;
+
+ state = readl(qm->io_base + regs);
+ while (state && check_times < ERROR_CHECK_TIMEOUT) {
+ udelay(CHECK_DELAY_TIME);
+ state = readl(qm->io_base + regs);
+ check_times++;
+ }
+
+ return state;
+}
+
+/* Check the PF's RAS state and Function INT state */
+static int qm_check_int_state(struct acc_vf_migration *acc_vf_dev)
+{
+ struct hisi_qm *vfqm = acc_vf_dev->vf_qm;
+ struct hisi_qm *qm = acc_vf_dev->pf_qm;
+ struct pci_dev *vf_pdev = acc_vf_dev->vf_dev;
+ struct device *dev = &qm->pdev->dev;
+ u32 state;
+
+ /* Check RAS state */
+ state = acc_check_reg_state(qm, QM_ABNORMAL_INT_STATUS);
+ if (state) {
+ dev_err(dev, "failed to check QM RAS state!\n");
+ return -EBUSY;
+ }
+
+ /* Check Function Communication state between PF and VF */
+ state = acc_check_reg_state(vfqm, QM_IFC_INT_STATUS);
+ if (state) {
+ dev_err(dev, "failed to check QM IFC INT state!\n");
+ return -EBUSY;
+ }
+ state = acc_check_reg_state(vfqm, QM_IFC_INT_SET_V);
+ if (state) {
+ dev_err(dev, "failed to check QM IFC INT SET state!\n");
+ return -EBUSY;
+ }
+
+ /* Check submodule task state */
+ switch (vf_pdev->device) {
+ case HISI_SEC_VF_DEV_ID:
+ state = acc_check_reg_state(qm, SEC_CORE_INT_STATUS);
+ if (state) {
+ dev_err(dev, "failed to check QM SEC Core INT state!\n");
+ return -EBUSY;
+ }
+ return 0;
+ case HISI_HPRE_VF_DEV_ID:
+ state = acc_check_reg_state(qm, HPRE_HAC_INT_STATUS);
+ if (state) {
+ dev_err(dev, "failed to check QM HPRE HAC INT state!\n");
+ return -EBUSY;
+ }
+ return 0;
+ case HISI_ZIP_VF_DEV_ID:
+ state = acc_check_reg_state(qm, HZIP_CORE_INT_STATUS);
+ if (state) {
+ dev_err(dev, "failed to check QM ZIP Core INT state!\n");
+ return -EBUSY;
+ }
+ return 0;
+ default:
+ dev_err(dev, "failed to detect acc module type!\n");
+ return -EINVAL;
+ }
+}
+
+static int qm_read_reg(struct hisi_qm *qm, u32 reg_addr,
+ u32 *data, u8 nums)
+{
+ int i;
+
+ if (nums < 1 || nums > QM_REGS_MAX_LEN)
+ return -EINVAL;
+
+ for (i = 0; i < nums; i++) {
+ data[i] = readl(qm->io_base + reg_addr);
+ reg_addr += QM_REG_ADDR_OFFSET;
+ }
+
+ return 0;
+}
+
+static int qm_write_reg(struct hisi_qm *qm, u32 reg,
+ u32 *data, u8 nums)
+{
+ int i;
+
+ if (nums < 1 || nums > QM_REGS_MAX_LEN)
+ return -EINVAL;
+
+ for (i = 0; i < nums; i++)
+ writel(data[i], qm->io_base + reg + i * QM_REG_ADDR_OFFSET);
+
+ return 0;
+}
+
+static int qm_get_vft(struct hisi_qm *qm, u32 *base)
+{
+ u64 sqc_vft;
+ u32 qp_num;
+ int ret;
+
+ ret = qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1);
+ if (ret)
+ return ret;
+
+ sqc_vft = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+ ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) <<
+ QM_XQC_ADDR_OFFSET);
+ *base = QM_SQC_VFT_BASE_MASK_V2 & (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2);
+ qp_num = (QM_SQC_VFT_NUM_MASK_V2 &
+ (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1;
+
+ return qp_num;
+}
+
+static int qm_get_sqc(struct hisi_qm *qm, u64 *addr)
+{
+ int ret;
+
+ ret = qm_mb(qm, QM_MB_CMD_SQC_BT, 0, 0, 1);
+ if (ret)
+ return ret;
+
+ *addr = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+ ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) <<
+ QM_XQC_ADDR_OFFSET);
+
+ return 0;
+}
+
+static int qm_get_cqc(struct hisi_qm *qm, u64 *addr)
+{
+ int ret;
+
+ ret = qm_mb(qm, QM_MB_CMD_CQC_BT, 0, 0, 1);
+ if (ret)
+ return ret;
+
+ *addr = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+ ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) <<
+ QM_XQC_ADDR_OFFSET);
+
+ return 0;
+}
+
+static int qm_rw_regs_read(struct hisi_qm *qm, struct acc_vf_data *vf_data)
+{
+ struct device *dev = &qm->pdev->dev;
+ int ret;
+
+ ret = qm_read_reg(qm, QM_VF_AEQ_INT_MASK, &vf_data->aeq_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_VF_AEQ_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_read_reg(qm, QM_VF_EQ_INT_MASK, &vf_data->eq_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_VF_EQ_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_read_reg(qm, QM_IFC_INT_SOURCE_V,
+ &vf_data->ifc_int_source, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_IFC_INT_SOURCE_V\n");
+ return ret;
+ }
+
+ ret = qm_read_reg(qm, QM_IFC_INT_MASK, &vf_data->ifc_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_IFC_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_read_reg(qm, QM_IFC_INT_SET_V, &vf_data->ifc_int_set, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_IFC_INT_SET_V\n");
+ return ret;
+ }
+
+ ret = qm_read_reg(qm, QM_QUE_ISO_CFG_V, &vf_data->que_iso_cfg, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_QUE_ISO_CFG_V\n");
+ return ret;
+ }
+
+ ret = qm_read_reg(qm, QM_PAGE_SIZE, &vf_data->page_size, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_PAGE_SIZE\n");
+ return ret;
+ }
+
+ ret = qm_read_reg(qm, QM_VF_STATE, &vf_data->vf_state, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_VF_STATE\n");
+ return ret;
+ }
+
+ /* QM_EQC_DW has 7 regs */
+ ret = qm_read_reg(qm, QM_EQC_DW0, vf_data->qm_eqc_dw, 7);
+ if (ret) {
+ dev_err(dev, "failed to read QM_EQC_DW\n");
+ return ret;
+ }
+
+ /* QM_AEQC_DW has 7 regs */
+ ret = qm_read_reg(qm, QM_AEQC_DW0, vf_data->qm_aeqc_dw, 7);
+ if (ret) {
+ dev_err(dev, "failed to read QM_AEQC_DW\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int qm_rw_regs_write(struct hisi_qm *qm, struct acc_vf_data *vf_data)
+{
+ struct device *dev = &qm->pdev->dev;
+ int ret;
+
+ /* check VF state */
+ if (unlikely(qm_wait_mb_ready(qm))) {
+ dev_err(&qm->pdev->dev, "QM device is not ready to write\n");
+ return -EBUSY;
+ }
+
+ ret = qm_write_reg(qm, QM_VF_AEQ_INT_MASK, &vf_data->aeq_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_VF_AEQ_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_write_reg(qm, QM_VF_EQ_INT_MASK, &vf_data->eq_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_VF_EQ_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_write_reg(qm, QM_IFC_INT_SOURCE_V,
+ &vf_data->ifc_int_source, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_IFC_INT_SOURCE_V\n");
+ return ret;
+ }
+
+ ret = qm_write_reg(qm, QM_IFC_INT_MASK, &vf_data->ifc_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_IFC_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_write_reg(qm, QM_IFC_INT_SET_V, &vf_data->ifc_int_set, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_IFC_INT_SET_V\n");
+ return ret;
+ }
+
+ ret = qm_write_reg(qm, QM_QUE_ISO_CFG_V, &vf_data->que_iso_cfg, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_QUE_ISO_CFG_V\n");
+ return ret;
+ }
+
+ ret = qm_write_reg(qm, QM_PAGE_SIZE, &vf_data->page_size, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_PAGE_SIZE\n");
+ return ret;
+ }
+
+ ret = qm_write_reg(qm, QM_VF_STATE, &vf_data->vf_state, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_VF_STATE\n");
+ return ret;
+ }
+
+ /* QM_EQC_DW has 7 regs */
+ ret = qm_write_reg(qm, QM_EQC_DW0, vf_data->qm_eqc_dw, 7);
+ if (ret) {
+ dev_err(dev, "failed to write QM_EQC_DW\n");
+ return ret;
+ }
+
+ /* QM_AEQC_DW has 7 regs */
+ ret = qm_write_reg(qm, QM_AEQC_DW0, vf_data->qm_aeqc_dw, 7);
+ if (ret) {
+ dev_err(dev, "failed to write QM_AEQC_DW\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void qm_db(struct hisi_qm *qm, u16 qn, u8 cmd,
+ u16 index, u8 priority)
+{
+ u64 doorbell;
+ u64 dbase;
+ u16 randata = 0;
+
+ if (cmd == QM_DOORBELL_CMD_SQ || cmd == QM_DOORBELL_CMD_CQ)
+ dbase = QM_DOORBELL_SQ_CQ_BASE_V2;
+ else
+ dbase = QM_DOORBELL_EQ_AEQ_BASE_V2;
+
+ doorbell = qn | ((u64)cmd << QM_DB_CMD_SHIFT_V2) |
+ ((u64)randata << QM_DB_RAND_SHIFT_V2) |
+ ((u64)index << QM_DB_INDEX_SHIFT_V2) |
+ ((u64)priority << QM_DB_PRIORITY_SHIFT_V2);
+
+ writeq(doorbell, qm->io_base + dbase);
+}
+
+static int vf_migration_data_store(struct hisi_qm *qm,
+ struct acc_vf_migration *acc_vf_dev)
+{
+ struct acc_vf_data *vf_data = acc_vf_dev->vf_data;
+ struct device *dev = &qm->pdev->dev;
+ int ret;
+
+ ret = qm_rw_regs_read(qm, vf_data);
+ if (ret)
+ return -EINVAL;
+
+ /* Every reg is 32 bit, the dma address is 64 bit. */
+ vf_data->eqe_dma = vf_data->qm_eqc_dw[2];
+ vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
+ vf_data->eqe_dma |= vf_data->qm_eqc_dw[1];
+ vf_data->aeqe_dma = vf_data->qm_aeqc_dw[2];
+ vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
+ vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[1];
+
+ /* Through SQC_BT/CQC_BT to get sqc and cqc address */
+ ret = qm_get_sqc(qm, &vf_data->sqc_dma);
+ if (ret) {
+ dev_err(dev, "failed to read SQC addr!\n");
+ return -EINVAL;
+ }
+
+ ret = qm_get_cqc(qm, &vf_data->cqc_dma);
+ if (ret) {
+ dev_err(dev, "failed to read CQC addr!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void qm_dev_cmd_init(struct hisi_qm *qm)
+{
+ /* Clear VF communication status registers. */
+ writel(0x1, qm->io_base + QM_IFC_INT_SOURCE_V);
+
+ /* Enable pf and vf communication. */
+ writel(0x0, qm->io_base + QM_IFC_INT_MASK);
+}
+
+static void vf_qm_fun_restart(struct hisi_qm *qm,
+ struct acc_vf_migration *acc_vf_dev)
+{
+ struct acc_vf_data *vf_data = acc_vf_dev->vf_data;
+ struct device *dev = &qm->pdev->dev;
+ int i;
+
+ /* Check if we need to restart VF */
+ if (vf_data->vf_state == VF_PREPARE) {
+ dev_info(dev, "No need to restart VF\n");
+ return;
+ }
+
+ for (i = 0; i < qm->qp_num; i++)
+ qm_db(qm, i, QM_DOORBELL_CMD_SQ, 0, 1);
+}
+
+static int vf_migration_data_recover(struct hisi_qm *qm,
+ struct acc_vf_migration *acc_vf_dev)
+{
+ struct device *dev = &qm->pdev->dev;
+ struct acc_vf_data *vf_data = acc_vf_dev->vf_data;
+ struct vfio_device_migration_info *mig_ctl = acc_vf_dev->mig_ctl;
+ int ret;
+
+ if (!mig_ctl->data_size)
+ return 0;
+
+ qm->eqe_dma = vf_data->eqe_dma;
+ qm->aeqe_dma = vf_data->aeqe_dma;
+ qm->sqc_dma = vf_data->sqc_dma;
+ qm->cqc_dma = vf_data->cqc_dma;
+
+ qm->qp_base = vf_data->qp_base;
+ qm->qp_num = vf_data->qp_num;
+
+ ret = qm_rw_regs_write(qm, vf_data);
+ if (ret) {
+ dev_err(dev, "Set VF regs failed\n");
+ return ret;
+ }
+
+ ret = qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0);
+ if (ret) {
+ dev_err(dev, "Set sqc failed\n");
+ return ret;
+ }
+
+ ret = qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0);
+ if (ret) {
+ dev_err(dev, "Set cqc failed\n");
+ return ret;
+ }
+
+ qm_dev_cmd_init(qm);
+
+ return 0;
+}
+
+static int vf_qm_cache_wb(struct hisi_qm *qm)
+{
+ unsigned int val;
+
+ writel(0x1, qm->io_base + QM_CACHE_WB_START);
+ if (readl_relaxed_poll_timeout(qm->io_base + QM_CACHE_WB_DONE,
+ val, val & BIT(0), MB_POLL_PERIOD_US,
+ MB_POLL_TIMEOUT_US)) {
+ dev_err(&qm->pdev->dev, "vf QM writeback sqc cache fail\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int vf_qm_func_stop(struct hisi_qm *qm)
+{
+ return qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
+}
+
+static int pf_qm_get_qp_num(struct hisi_qm *qm, int vf_id, u32 *rbase)
+{
+ unsigned int val;
+ u64 sqc_vft;
+ u32 qp_num;
+ int ret;
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+ val & BIT(0), MB_POLL_PERIOD_US,
+ MB_POLL_TIMEOUT_US);
+ if (ret)
+ return ret;
+
+ writel(0x1, qm->io_base + QM_VFT_CFG_OP_WR);
+ /* 0 mean SQC VFT */
+ writel(0x0, qm->io_base + QM_VFT_CFG_TYPE);
+ writel(vf_id, qm->io_base + QM_VFT_CFG);
+
+ writel(0x0, qm->io_base + QM_VFT_CFG_RDY);
+ writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE);
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+ val & BIT(0), MB_POLL_PERIOD_US,
+ MB_POLL_TIMEOUT_US);
+ if (ret)
+ return ret;
+
+ sqc_vft = readl(qm->io_base + QM_VFT_CFG_DATA_L) |
+ ((u64)readl(qm->io_base + QM_VFT_CFG_DATA_H) <<
+ QM_XQC_ADDR_OFFSET);
+ *rbase = QM_SQC_VFT_BASE_MASK_V2 &
+ (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2);
+ qp_num = (QM_SQC_VFT_NUM_MASK_V2 &
+ (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1;
+
+ return qp_num;
+}
+
+static int pf_qm_state_pre_save(struct hisi_qm *qm,
+ struct acc_vf_migration *acc_vf_dev)
+{
+ struct acc_vf_data *vf_data = acc_vf_dev->vf_data;
+ struct vfio_device_migration_info *mig_ctl = acc_vf_dev->mig_ctl;
+ struct device *dev = &qm->pdev->dev;
+ int vf_id = acc_vf_dev->vf_id;
+ int ret;
+
+ /* save device id */
+ vf_data->dev_id = acc_vf_dev->vf_dev->device;
+
+ /* vf qp num save from PF */
+ ret = pf_qm_get_qp_num(qm, vf_id, &qm->qp_base);
+ if (ret <= 0) {
+ dev_err(dev, "failed to get vft qp nums!\n");
+ return -EINVAL;
+ }
+ qm->qp_num = ret;
+ vf_data->qp_base = qm->qp_base;
+ vf_data->qp_num = qm->qp_num;
+
+ /* vf isolation state save from PF */
+ ret = qm_read_reg(qm, QM_QUE_ISO_CFG, &vf_data->que_iso_cfg, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_QUE_ISO_CFG!\n");
+ return ret;
+ }
+
+ mig_ctl->data_size = QM_MATCH_SIZE;
+ mig_ctl->pending_bytes = mig_ctl->data_size;
+
+ return 0;
+}
+
+static int vf_qm_state_save(struct hisi_qm *qm,
+ struct acc_vf_migration *acc_vf_dev)
+{
+ struct device *dev = &acc_vf_dev->vf_dev->dev;
+ struct vfio_device_migration_info *mig_ctl = acc_vf_dev->mig_ctl;
+ int ret;
+
+ mig_ctl->data_size = 0;
+ mig_ctl->pending_bytes = 0;
+
+ if (unlikely(qm_wait_dev_ready(qm))) {
+ dev_info(&qm->pdev->dev, "QM device not ready, no data to transfer\n");
+ return 0;
+ }
+
+ /* First stop the ACC vf function */
+ ret = vf_qm_func_stop(qm);
+ if (ret) {
+ dev_err(dev, "failed to stop QM VF function!\n");
+ return ret;
+ }
+
+ ret = qm_check_int_state(acc_vf_dev);
+ if (ret) {
+ dev_err(dev, "failed to check QM INT state!\n");
+ goto state_error;
+ }
+
+ ret = vf_qm_cache_wb(qm);
+ if (ret) {
+ dev_err(dev, "failed to writeback QM Cache!\n");
+ goto state_error;
+ }
+
+ ret = vf_migration_data_store(qm, acc_vf_dev);
+ if (ret) {
+ dev_err(dev, "failed to get and store migration data!\n");
+ goto state_error;
+ }
+
+ mig_ctl->data_size = sizeof(struct acc_vf_data);
+ mig_ctl->pending_bytes = mig_ctl->data_size;
+
+ return 0;
+
+state_error:
+ vf_qm_fun_restart(qm, acc_vf_dev);
+ return ret;
+}
+
+static int vf_qm_state_resume(struct hisi_qm *qm,
+ struct acc_vf_migration *acc_vf_dev)
+{
+ struct device *dev = &acc_vf_dev->vf_dev->dev;
+ int ret;
+
+ /* recover data to VF */
+ ret = vf_migration_data_recover(qm, acc_vf_dev);
+ if (ret) {
+ dev_err(dev, "failed to recover the VF!\n");
+ return ret;
+ }
+
+ /* restart all destination VF's QP */
+ vf_qm_fun_restart(qm, acc_vf_dev);
+
+ return 0;
+}
+
+/*
+ * HiSilicon ACC VF dev MMIO space contains both the functional register
+ * space and the migration control register space. We hide the migration
+ * control space from the Guest. But to successfully complete the live
+ * migration, we still need access to the functional MMIO space assigned
+ * to the Guest. To avoid any potential security issues, we need to be
+ * careful not to access this region while the Guest vCPUs are running.
+ *
+ * Hence check the device state before we map the region.
+ */
+static int hisi_acc_vf_ioremap(struct acc_vf_migration *acc_vf_dev, u32 state)
+{
+ struct hisi_qm *vfqm = acc_vf_dev->vf_qm;
+ struct pci_dev *pdev = acc_vf_dev->vf_dev;
+
+ if (state == (VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RUNNING))
+ return -EINVAL;
+
+ if (vfqm->io_base)
+ return 0;
+
+ vfqm->io_base = ioremap(vfqm->phys_base,
+ pci_resource_len(pdev, VFIO_PCI_BAR2_REGION_INDEX));
+ if (!vfqm->io_base)
+ return -EIO;
+
+ return 0;
+}
+
+static void hisi_acc_vf_iounmap(struct acc_vf_migration *acc_vf_dev)
+{
+ struct hisi_qm *vfqm = acc_vf_dev->vf_qm;
+
+ if (!vfqm->io_base)
+ return;
+
+ iounmap(vfqm->io_base);
+ vfqm->io_base = NULL;
+}
+
+static int hisi_acc_vf_set_device_state(struct acc_vf_migration *acc_vf_dev,
+ u32 state)
+{
+ struct vfio_device_migration_info *mig_ctl = acc_vf_dev->mig_ctl;
+ struct hisi_qm *pfqm = acc_vf_dev->pf_qm;
+ struct hisi_qm *vfqm = acc_vf_dev->vf_qm;
+ int ret = 0;
+
+ if (state == mig_ctl->device_state)
+ return 0;
+
+ switch (state) {
+ case VFIO_DEVICE_STATE_RUNNING:
+ if (mig_ctl->device_state == VFIO_DEVICE_STATE_RESUMING) {
+ ret = hisi_acc_vf_ioremap(acc_vf_dev, state);
+ if (ret)
+ return ret;
+
+ ret = vf_qm_state_resume(vfqm, acc_vf_dev);
+ if (ret)
+ goto out;
+ }
+ break;
+ case VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RUNNING:
+ ret = pf_qm_state_pre_save(pfqm, acc_vf_dev);
+ if (ret)
+ return ret;
+
+ break;
+ case VFIO_DEVICE_STATE_SAVING:
+ ret = hisi_acc_vf_ioremap(acc_vf_dev, state);
+ if (ret)
+ return ret;
+
+ ret = vf_qm_state_save(vfqm, acc_vf_dev);
+ if (ret)
+ goto out;
+
+ break;
+ case VFIO_DEVICE_STATE_STOP:
+ ret = hisi_acc_vf_ioremap(acc_vf_dev, state);
+ if (ret)
+ return ret;
+
+ vf_qm_fun_restart(vfqm, acc_vf_dev);
+ break;
+ case VFIO_DEVICE_STATE_RESUMING:
+ break;
+ default:
+ return -EFAULT;
+ }
+
+ mig_ctl->device_state = state;
+
+out:
+ hisi_acc_vf_iounmap(acc_vf_dev);
+ return ret;
+}
+
+static int hisi_acc_vf_match_check(struct acc_vf_migration *acc_vf_dev)
+{
+ struct vfio_device_migration_info *mig_ctl = acc_vf_dev->mig_ctl;
+ struct acc_vf_data *vf_data = acc_vf_dev->vf_data;
+ struct hisi_qm *qm = acc_vf_dev->vf_qm;
+ struct device *dev = &qm->pdev->dev;
+ u32 que_iso_state;
+ int ret;
+
+ /*
+ * Check we are in the correct dev state and have enough data to
+ * perform the check.
+ */
+ if (mig_ctl->device_state != VFIO_DEVICE_STATE_RESUMING ||
+ mig_ctl->data_size != QM_MATCH_SIZE)
+ return 0;
+
+ /* vf acc dev type check */
+ if (vf_data->dev_id != acc_vf_dev->vf_dev->device) {
+ dev_err(dev, "failed to match VF devices\n");
+ return -EINVAL;
+ }
+
+ ret = hisi_acc_vf_ioremap(acc_vf_dev, mig_ctl->device_state);
+ if (ret)
+ return ret;
+
+ /* vf qp num check */
+ ret = qm_get_vft(qm, &qm->qp_base);
+ if (ret <= 0) {
+ dev_err(dev, "failed to get vft qp nums\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ qm->qp_num = ret;
+
+ if (vf_data->qp_num != qm->qp_num) {
+ dev_err(dev, "failed to match VF qp num\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* vf isolation state check */
+ ret = qm_read_reg(qm, QM_QUE_ISO_CFG_V, &que_iso_state, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_QUE_ISO_CFG_V\n");
+ goto out;
+ }
+ if (vf_data->que_iso_cfg != que_iso_state) {
+ dev_err(dev, "failed to match isolation state\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* clear the VF match data size */
+ mig_ctl->pending_bytes = 0;
+ mig_ctl->data_size = 0;
+
+out:
+ hisi_acc_vf_iounmap(acc_vf_dev);
+ return ret;
+}
+
+static int hisi_acc_vf_data_transfer(struct acc_vf_migration *acc_vf_dev,
+ char __user *buf, size_t count, u64 pos,
+ bool iswrite)
+{
+ struct vfio_device_migration_info *mig_ctl = acc_vf_dev->mig_ctl;
+ void *data_addr = acc_vf_dev->vf_data;
+ int ret = 0;
+
+ if (!count || pos < mig_ctl->data_offset || pos > MIGRATION_REGION_SZ)
+ return -EINVAL;
+
+ data_addr += pos - mig_ctl->data_offset;
+ if (iswrite) {
+ ret = copy_from_user(data_addr, buf, count);
+ if (ret)
+ return -EFAULT;
+
+ mig_ctl->pending_bytes += count;
+ } else {
+ ret = copy_to_user(buf, data_addr, count);
+ if (ret)
+ return -EFAULT;
+
+ mig_ctl->pending_bytes -= count;
+ }
+
+ return count;
+}
+
+static size_t hisi_acc_vf_migrn_rw(struct vfio_pci_core_device *vdev,
+ char __user *buf, size_t count, loff_t *ppos,
+ bool iswrite)
+{
+ unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
+ VFIO_PCI_NUM_REGIONS;
+ struct vfio_pci_region *region = &vdev->region[index];
+ struct acc_vf_migration *acc_vf_dev;
+ struct vfio_device_migration_info *mig_ctl;
+ u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
+ struct device *dev;
+ u32 device_state;
+ int ret;
+
+ if (!region)
+ return -EINVAL;
+
+ acc_vf_dev = region->data;
+ dev = &acc_vf_dev->vf_dev->dev;
+ mig_ctl = acc_vf_dev->mig_ctl;
+
+ if (pos >= region->size)
+ return -EINVAL;
+
+ count = min(count, (size_t)(region->size - pos));
+
+ switch (pos) {
+ case VDM_OFFSET(device_state):
+ if (count != sizeof(mig_ctl->device_state))
+ return -EFAULT;
+
+ if (iswrite) {
+ ret = copy_from_user(&device_state, buf, count);
+ if (ret)
+ return -EFAULT;
+
+ ret = hisi_acc_vf_set_device_state(acc_vf_dev, device_state);
+ if (ret)
+ return ret;
+ } else {
+ ret = copy_to_user(buf, &mig_ctl->device_state, count);
+ if (ret)
+ return -EFAULT;
+ }
+
+ return count;
+ case VDM_OFFSET(reserved):
+ return 0;
+ case VDM_OFFSET(pending_bytes):
+ if (count != sizeof(mig_ctl->pending_bytes))
+ return -EINVAL;
+
+ if (iswrite)
+ return -EFAULT;
+
+ ret = copy_to_user(buf, &mig_ctl->pending_bytes, count);
+ if (ret)
+ return -EFAULT;
+
+ return count;
+ case VDM_OFFSET(data_offset):
+ if (count != sizeof(mig_ctl->data_offset))
+ return -EINVAL;
+
+ if (iswrite) {
+ ret = copy_from_user(&mig_ctl->data_offset, buf, count);
+ if (ret)
+ return -EFAULT;
+ } else {
+ ret = copy_to_user(buf, &mig_ctl->data_offset, count);
+ if (ret)
+ return -EFAULT;
+ }
+
+ return count;
+ case VDM_OFFSET(data_size):
+ if (count != sizeof(mig_ctl->data_size))
+ return -EINVAL;
+
+ if (iswrite) {
+ ret = copy_from_user(&mig_ctl->data_size, buf, count);
+ if (ret)
+ return -EFAULT;
+
+ /* Check whether the src and dst VF's match */
+ ret = hisi_acc_vf_match_check(acc_vf_dev);
+ if (ret)
+ return ret;
+ } else {
+ ret = copy_to_user(buf, &mig_ctl->data_size, count);
+ if (ret)
+ return -EFAULT;
+ }
+
+ return count;
+ default:
+ /* Transfer data */
+ return hisi_acc_vf_data_transfer(acc_vf_dev, buf,
+ count, pos, iswrite);
+ }
+}
+
+static void hisi_acc_vfio_pci_uninit(struct acc_vf_migration *acc_vf_dev)
+{
+ kfree(acc_vf_dev->mig_ctl);
+ kfree(acc_vf_dev->vf_qm);
+}
+
+static void hisi_acc_vf_migrn_release(struct vfio_pci_core_device *vdev,
+ struct vfio_pci_region *region)
+{
+ struct acc_vf_migration *acc_vf_dev = region->data;
+
+ hisi_acc_vfio_pci_uninit(acc_vf_dev);
+ kfree(acc_vf_dev);
+}
+
+static const struct vfio_pci_regops hisi_acc_vfio_pci_regops = {
+ .rw = hisi_acc_vf_migrn_rw,
+ .release = hisi_acc_vf_migrn_release,
+};
+
+static int hisi_acc_vf_dev_init(struct pci_dev *pdev, struct hisi_qm *pf_qm,
+ struct acc_vf_migration *acc_vf_dev)
+{
+ struct vfio_device_migration_info *mig_ctl;
+ struct hisi_qm *vf_qm;
+
+ vf_qm = kzalloc(sizeof(*vf_qm), GFP_KERNEL);
+ if (!vf_qm)
+ return -ENOMEM;
+
+ vf_qm->dev_name = pf_qm->dev_name;
+ vf_qm->fun_type = QM_HW_VF;
+ vf_qm->phys_base = pci_resource_start(pdev, VFIO_PCI_BAR2_REGION_INDEX);
+ vf_qm->pdev = pdev;
+ mutex_init(&vf_qm->mailbox_lock);
+
+ acc_vf_dev->vf_qm = vf_qm;
+ acc_vf_dev->pf_qm = pf_qm;
+
+ /* the data region must follow migration info */
+ mig_ctl = kzalloc(MIGRATION_REGION_SZ, GFP_KERNEL);
+ if (!mig_ctl)
+ goto init_qm_error;
+
+ acc_vf_dev->mig_ctl = mig_ctl;
+
+ acc_vf_dev->vf_data = (void *)(mig_ctl + 1);
+
+ mig_ctl->device_state = VFIO_DEVICE_STATE_RUNNING;
+ mig_ctl->data_offset = sizeof(*mig_ctl);
+ mig_ctl->data_size = sizeof(struct acc_vf_data);
+
+ return 0;
+
+init_qm_error:
+ kfree(vf_qm);
+ return -ENOMEM;
+}
+
+static int hisi_acc_vfio_pci_init(struct vfio_pci_core_device *vdev)
+{
+ struct acc_vf_migration *acc_vf_dev;
+ struct pci_dev *pdev = vdev->pdev;
+ struct pci_dev *pf_dev, *vf_dev;
+ struct hisi_qm *pf_qm;
+ int vf_id, ret;
+
+ pf_dev = pdev->physfn;
+ vf_dev = pdev;
+
+ pf_qm = pci_get_drvdata(pf_dev);
+ if (!pf_qm) {
+ pr_err("HiSi ACC qm driver not loaded\n");
+ return -EINVAL;
+ }
+
+ if (pf_qm->ver < QM_HW_V3) {
+ dev_err(&pdev->dev,
+ "Migration not supported, hw version: 0x%x\n",
+ pf_qm->ver);
+ return -ENODEV;
+ }
+
+ vf_id = PCI_FUNC(vf_dev->devfn);
+ acc_vf_dev = kzalloc(sizeof(*acc_vf_dev), GFP_KERNEL);
+ if (!acc_vf_dev)
+ return -ENOMEM;
+
+ acc_vf_dev->vf_id = vf_id;
+ acc_vf_dev->pf_dev = pf_dev;
+ acc_vf_dev->vf_dev = vf_dev;
+
+ ret = hisi_acc_vf_dev_init(pdev, pf_qm, acc_vf_dev);
+ if (ret) {
+ kfree(acc_vf_dev);
+ return -ENOMEM;
+ }
+
+ ret = vfio_pci_register_dev_region(vdev, VFIO_REGION_TYPE_MIGRATION,
+ VFIO_REGION_SUBTYPE_MIGRATION,
+ &hisi_acc_vfio_pci_regops,
+ MIGRATION_REGION_SZ,
+ VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE,
+ acc_vf_dev);
+ if (ret)
+ goto out;
+
+ return 0;
+out:
+ hisi_acc_vfio_pci_uninit(acc_vf_dev);
+ kfree(acc_vf_dev);
+ return ret;
+}
+
static long hisi_acc_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
unsigned long arg)
{
@@ -64,6 +1073,12 @@ static int hisi_acc_vfio_pci_open(struct vfio_device *core_vdev)
if (ret)
return ret;

+ ret = hisi_acc_vfio_pci_init(vdev);
+ if (ret) {
+ vfio_pci_core_disable(vdev);
+ return ret;
+ }
+
vfio_pci_core_finish_enable(vdev);

return 0;
@@ -113,9 +1128,9 @@ static void hisi_acc_vfio_pci_remove(struct pci_dev *pdev)
}

static const struct pci_device_id hisi_acc_vfio_pci_table[] = {
- { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_HUAWEI, 0xa256) }, /* SEC VF */
- { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_HUAWEI, 0xa259) }, /* HPRE VF */
- { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_HUAWEI, 0xa251) }, /* ZIP VF */
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_HUAWEI, HISI_SEC_VF_DEV_ID) },
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_HUAWEI, HISI_HPRE_VF_DEV_ID) },
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_HUAWEI, HISI_ZIP_VF_DEV_ID) },
{ 0, }
};

@@ -137,4 +1152,4 @@ module_pci_driver(hisi_acc_vfio_pci_driver);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Liu Longfang <liulongfang@xxxxxxxxxx>");
MODULE_AUTHOR("Shameer Kolothum <shameerali.kolothum.thodi@xxxxxxxxxx>");
-MODULE_DESCRIPTION("HiSilicon VFIO PCI - Generic VFIO PCI driver for HiSilicon ACC device family");
+MODULE_DESCRIPTION("HiSilicon VFIO PCI - VFIO PCI driver with live migration support for HiSilicon ACC device family");
diff --git a/drivers/vfio/pci/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisi_acc_vfio_pci.h
new file mode 100644
index 000000000000..f3f8c38d6f1a
--- /dev/null
+++ b/drivers/vfio/pci/hisi_acc_vfio_pci.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 HiSilicon Ltd. */
+
+#ifndef HISI_ACC_VFIO_PCI_H
+#define HISI_ACC_VFIO_PCI_H
+
+#include "../../drivers/crypto/hisilicon/qm.h"
+
+#define VDM_OFFSET(x) offsetof(struct vfio_device_migration_info, x)
+#define MIGRATION_REGION_SZ (sizeof(struct acc_vf_data) + \
+ sizeof(struct vfio_device_migration_info))
+
+#define HISI_SEC_VF_DEV_ID 0xa256
+#define HISI_HPRE_VF_DEV_ID 0xa259
+#define HISI_ZIP_VF_DEV_ID 0xa251
+
+#define MB_POLL_PERIOD_US 10
+#define MB_POLL_TIMEOUT_US 1000
+#define QM_CACHE_WB_START 0x204
+#define QM_CACHE_WB_DONE 0x208
+#define QM_MB_CMD_PAUSE_QM 0xe
+#define QM_ABNORMAL_INT_STATUS 0x100008
+#define QM_IFC_INT_STATUS 0x0028
+#define SEC_CORE_INT_STATUS 0x301008
+#define HPRE_HAC_INT_STATUS 0x301800
+#define HZIP_CORE_INT_STATUS 0x3010AC
+#define QM_QUE_ISO_CFG 0x301154
+
+#define QM_VFT_CFG_RDY 0x10006c
+#define QM_VFT_CFG_OP_WR 0x100058
+#define QM_VFT_CFG_TYPE 0x10005c
+#define QM_VFT_CFG 0x100060
+#define QM_VFT_CFG_OP_ENABLE 0x100054
+#define QM_VFT_CFG_DATA_L 0x100064
+#define QM_VFT_CFG_DATA_H 0x100068
+
+#define ERROR_CHECK_TIMEOUT 100
+#define CHECK_DELAY_TIME 100
+
+#define QM_SQC_VFT_BASE_SHIFT_V2 28
+#define QM_SQC_VFT_BASE_MASK_V2 GENMASK(15, 0)
+#define QM_SQC_VFT_NUM_SHIFT_V2 45
+#define QM_SQC_VFT_NUM_MASK_V2 GENMASK(9, 0)
+
+/* mailbox */
+#define QM_MB_CMD_SQC_BT 0x4
+#define QM_MB_CMD_CQC_BT 0x5
+#define QM_MB_CMD_SQC_VFT_V2 0x6
+
+#define QM_MB_CMD_SEND_BASE 0x300
+#define QM_MB_BUSY_SHIFT 13
+#define QM_MB_OP_SHIFT 14
+#define QM_MB_CMD_DATA_ADDR_L 0x304
+#define QM_MB_CMD_DATA_ADDR_H 0x308
+#define QM_MB_MAX_WAIT_CNT 6000
+
+/* doorbell */
+#define QM_DOORBELL_CMD_SQ 0
+#define QM_DOORBELL_CMD_CQ 1
+#define QM_DOORBELL_SQ_CQ_BASE_V2 0x1000
+#define QM_DOORBELL_EQ_AEQ_BASE_V2 0x2000
+#define QM_DB_CMD_SHIFT_V2 12
+#define QM_DB_RAND_SHIFT_V2 16
+#define QM_DB_INDEX_SHIFT_V2 32
+#define QM_DB_PRIORITY_SHIFT_V2 48
+
+/* RW regs */
+#define QM_REGS_MAX_LEN 7
+#define QM_REG_ADDR_OFFSET 0x0004
+
+#define QM_XQC_ADDR_OFFSET 32U
+#define QM_VF_AEQ_INT_MASK 0x0004
+#define QM_VF_EQ_INT_MASK 0x000c
+#define QM_IFC_INT_SOURCE_V 0x0020
+#define QM_IFC_INT_MASK 0x0024
+#define QM_IFC_INT_SET_V 0x002c
+#define QM_QUE_ISO_CFG_V 0x0030
+#define QM_PAGE_SIZE 0x0034
+#define QM_VF_STATE 0x0060
+
+#define QM_EQC_DW0 0X8000
+#define QM_AEQC_DW0 0X8020
+
+#define QM_MATCH_SIZE 32L
+
+enum vf_state {
+ VF_READY = 0x0,
+ VF_NOT_READY,
+ VF_PREPARE,
+};
+
+struct acc_vf_data {
+ /* QM match information */
+ u32 qp_num;
+ u32 dev_id;
+ u32 que_iso_cfg;
+ u32 qp_base;
+ /* QM reserved 4 match information */
+ u32 qm_rsv_state[4];
+
+ /* QM RW regs */
+ u32 aeq_int_mask;
+ u32 eq_int_mask;
+ u32 ifc_int_source;
+ u32 ifc_int_mask;
+ u32 ifc_int_set;
+ u32 page_size;
+ u32 vf_state;
+
+ /*
+ * QM_VF_MB has 4 regs don't need to migration
+ * mailbox regs writeback value will cause
+ * hardware to perform command operations
+ */
+
+ /* QM_EQC_DW has 7 regs */
+ u32 qm_eqc_dw[7];
+
+ /* QM_AEQC_DW has 7 regs */
+ u32 qm_aeqc_dw[7];
+
+ /* QM reserved 5 regs */
+ u32 qm_rsv_regs[5];
+
+ /* qm memory init information */
+ dma_addr_t eqe_dma;
+ dma_addr_t aeqe_dma;
+ dma_addr_t sqc_dma;
+ dma_addr_t cqc_dma;
+};
+
+struct acc_vf_migration {
+ struct pci_dev *pf_dev;
+ struct pci_dev *vf_dev;
+ struct hisi_qm *pf_qm;
+ struct hisi_qm *vf_qm;
+ int vf_id;
+
+ struct vfio_device_migration_info *mig_ctl;
+ struct acc_vf_data *vf_data;
+};
+
+#endif /* HISI_ACC_VFIO_PCI_H */
+
--
2.17.1