Re: [RFC 1/2] vhost: IFC VF hardware operation layer

From: Jason Wang
Date: Mon Oct 21 2019 - 06:35:46 EST



On 2019/10/21 äå6:00, Zhu, Lingshan wrote:

On 10/16/2019 4:40 PM, Jason Wang wrote:

On 2019/10/16 äå9:30, Zhu Lingshan wrote:
This commit introduced ifcvf_base layer, which handles IFC VF NIC
hardware operations and configurations.


It's better to describe the difference between ifc vf and virtio in the commit log or is there a open doc for this?


Hi Jason,

Sure, I will split these code into small patches with detailed commit logs in v1 patchset.

Signed-off-by: Zhu Lingshan <lingshan.zhu@xxxxxxxxx>
---
 drivers/vhost/ifcvf/ifcvf_base.c | 390 +++++++++++++++++++++++++++++++++++++++
 drivers/vhost/ifcvf/ifcvf_base.h | 137 ++++++++++++++
 2 files changed, 527 insertions(+)
 create mode 100644 drivers/vhost/ifcvf/ifcvf_base.c
 create mode 100644 drivers/vhost/ifcvf/ifcvf_base.h

diff --git a/drivers/vhost/ifcvf/ifcvf_base.c b/drivers/vhost/ifcvf/ifcvf_base.c
new file mode 100644
index 000000000000..b85e14c9bdcf
--- /dev/null
+++ b/drivers/vhost/ifcvf/ifcvf_base.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2019 Intel Corporation.
+ */
+
+#include "ifcvf_base.h"
+
+static void *get_cap_addr(struct ifcvf_hw *hw, struct virtio_pci_cap *cap)
+{
+ÂÂÂ u8 bar = cap->bar;
+ÂÂÂ u32 length = cap->length;
+ÂÂÂ u32 offset = cap->offset;
+ÂÂÂ struct ifcvf_adapter *ifcvf =
+ÂÂÂÂÂÂÂ container_of(hw, struct ifcvf_adapter, vf);
+
+ÂÂÂ if (bar >= IFCVF_PCI_MAX_RESOURCE) {
+ÂÂÂÂÂÂÂ IFC_ERR(ifcvf->dev,
+ÂÂÂÂÂÂÂÂÂÂÂ "Invalid bar number %u to get capabilities.\n", bar);
+ÂÂÂÂÂÂÂ return NULL;
+ÂÂÂ }
+
+ÂÂÂ if (offset + length < offset) {
+ÂÂÂÂÂÂÂ IFC_ERR(ifcvf->dev, "offset(%u) + length(%u) overflows\n",
+ÂÂÂÂÂÂÂÂÂÂÂ offset, length);
+ÂÂÂÂÂÂÂ return NULL;
+ÂÂÂ }
+
+ÂÂÂ if (offset + length > hw->mem_resource[cap->bar].len) {
+ÂÂÂÂÂÂÂ IFC_ERR(ifcvf->dev,
+ÂÂÂÂÂÂÂÂÂÂÂ "offset(%u) + len(%u) overflows bar%u to get capabilities.\n",
+ÂÂÂÂÂÂÂÂÂÂÂ offset, length, bar);
+ÂÂÂÂÂÂÂ return NULL;
+ÂÂÂ }
+
+ÂÂÂ return hw->mem_resource[bar].addr + offset;
+}
+
+int ifcvf_read_config_range(struct pci_dev *dev,
+ÂÂÂÂÂÂÂÂÂÂÂ uint32_t *val, int size, int where)
+{
+ÂÂÂ int i;
+
+ÂÂÂ for (i = 0; i < size; i += 4) {
+ÂÂÂÂÂÂÂ if (pci_read_config_dword(dev, where + i, val + i / 4) < 0)
+ÂÂÂÂÂÂÂÂÂÂÂ return -1;
+ÂÂÂ }
+ÂÂÂ return 0;
+}
+
+int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev)
+{
+ÂÂÂ int ret;
+ÂÂÂ u8 pos;
+ÂÂÂ struct virtio_pci_cap cap;
+ÂÂÂ u32 i;
+ÂÂÂ u16 notify_off;
+
+ÂÂÂ ret = pci_read_config_byte(dev, PCI_CAPABILITY_LIST, &pos);
+
+ÂÂÂ if (ret < 0) {
+ÂÂÂÂÂÂÂ IFC_ERR(&dev->dev, "failed to read PCI capability list.\n");
+ÂÂÂÂÂÂÂ return -EIO;
+ÂÂÂ }
+
+ÂÂÂ while (pos) {
+ÂÂÂÂÂÂÂ ret = ifcvf_read_config_range(dev, (u32 *)&cap,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ sizeof(cap), pos);
+
+ÂÂÂÂÂÂÂ if (ret < 0) {
+ÂÂÂÂÂÂÂÂÂÂÂ IFC_ERR(&dev->dev, "failed to get PCI capability at %x",
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pos);
+ÂÂÂÂÂÂÂÂÂÂÂ break;
+ÂÂÂÂÂÂÂ }
+
+ÂÂÂÂÂÂÂ if (cap.cap_vndr != PCI_CAP_ID_VNDR)
+ÂÂÂÂÂÂÂÂÂÂÂ goto next;
+
+ÂÂÂÂÂÂÂ IFC_INFO(&dev->dev, "read PCI config:\n"
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "config type: %u.\n"
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "PCI bar: %u.\n"
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "PCI bar offset: %u.\n"
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "PCI config len: %u.\n",
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ cap.cfg_type, cap.bar,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ cap.offset, cap.length);
+
+ÂÂÂÂÂÂÂ switch (cap.cfg_type) {
+ÂÂÂÂÂÂÂ case VIRTIO_PCI_CAP_COMMON_CFG:
+ÂÂÂÂÂÂÂÂÂÂÂ hw->common_cfg = get_cap_addr(hw, &cap);
+ÂÂÂÂÂÂÂÂÂÂÂ IFC_INFO(&dev->dev, "hw->common_cfg = %p.\n",
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ hw->common_cfg);
+ÂÂÂÂÂÂÂÂÂÂÂ break;
+ÂÂÂÂÂÂÂ case VIRTIO_PCI_CAP_NOTIFY_CFG:
+ÂÂÂÂÂÂÂÂÂÂÂ pci_read_config_dword(dev, pos + sizeof(cap),
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ &hw->notify_off_multiplier);
+ÂÂÂÂÂÂÂÂÂÂÂ hw->notify_bar = cap.bar;
+ÂÂÂÂÂÂÂÂÂÂÂ hw->notify_base = get_cap_addr(hw, &cap);
+ÂÂÂÂÂÂÂÂÂÂÂ IFC_INFO(&dev->dev, "hw->notify_base = %p.\n",
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ hw->notify_base);
+ÂÂÂÂÂÂÂÂÂÂÂ break;
+ÂÂÂÂÂÂÂ case VIRTIO_PCI_CAP_ISR_CFG:
+ÂÂÂÂÂÂÂÂÂÂÂ hw->isr = get_cap_addr(hw, &cap);
+ÂÂÂÂÂÂÂÂÂÂÂ IFC_INFO(&dev->dev, "hw->isr = %p.\n", hw->isr);
+ÂÂÂÂÂÂÂÂÂÂÂ break;
+ÂÂÂÂÂÂÂ case VIRTIO_PCI_CAP_DEVICE_CFG:
+ÂÂÂÂÂÂÂÂÂÂÂ hw->dev_cfg = get_cap_addr(hw, &cap);
+ÂÂÂÂÂÂÂÂÂÂÂ IFC_INFO(&dev->dev, "hw->dev_cfg = %p.\n", hw->dev_cfg);
+ÂÂÂÂÂÂÂÂÂÂÂ break;
+ÂÂÂÂÂÂÂ }
+next:
+ÂÂÂÂÂÂÂ pos = cap.cap_next;
+ÂÂÂ }
+
+ÂÂÂ if (hw->common_cfg == NULL || hw->notify_base == NULL ||
+ÂÂÂÂÂÂÂ hw->isr == NULL || hw->dev_cfg == NULL) {
+ÂÂÂÂÂÂÂ IFC_ERR(&dev->dev, "Incomplete PCI capabilities.\n");
+ÂÂÂÂÂÂÂ return -1;
+ÂÂÂ }
+
+ÂÂÂ for (i = 0; i < (IFCVF_MAX_QUEUE_PAIRS * 2); i++) {


Any reason for using hard coded queue pairs limit other than the max_queue_pairs in the net config?
Hi Jason, Thanks for your kindly comments. For now the driver don't support MQ, we intend to provide a minimal feature sets in this version 1 driver.


Ok, it's better to add comment above IFCVF_MAX_QUEUE_PAIRS.




+ÂÂÂÂÂÂÂ iowrite16(i, &hw->common_cfg->queue_select);
+ÂÂÂÂÂÂÂ notify_off = ioread16(&hw->common_cfg->queue_notify_off);
+ÂÂÂÂÂÂÂ hw->notify_addr[i] = (void *)((u8 *)hw->notify_base +
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ notify_off * hw->notify_off_multiplier);
+ÂÂÂ }
+
+ÂÂÂ hw->lm_cfg = hw->mem_resource[4].addr;
+
+ÂÂÂ IFC_INFO(&dev->dev, "PCI capability mapping:\n"
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "common cfg: %p\n"
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "notify base: %p\n"
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "isr cfg: %p\n"
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "device cfg: %p\n"
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "multiplier: %u\n",
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ hw->common_cfg,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ hw->notify_base,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ hw->isr,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ hw->dev_cfg,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ hw->notify_off_multiplier);
+
+ÂÂÂ return 0;
+}
+
+static u8 ifcvf_get_status(struct ifcvf_hw *hw)
+{
+ÂÂÂ return ioread8(&hw->common_cfg->device_status);
+}
+
+static void ifcvf_set_status(struct ifcvf_hw *hw, u8 status)
+{
+ÂÂÂ iowrite8(status, &hw->common_cfg->device_status);
+}
+
+static void ifcvf_reset(struct ifcvf_hw *hw)
+{
+ÂÂÂ ifcvf_set_status(hw, 0);
+
+ÂÂÂ /* flush status write */
+ÂÂÂ ifcvf_get_status(hw);


Why this flush is needed?

accoring to PCIE requirements, this get_status() after a set_status() is used to block the call chain, make sure the hardware has finished the write operation.

It is a bad comment anyway, I will remove it.


Interesting, does this mean if we need also fix the vp_set_status for kernel virtio_pci driver?





+ÂÂÂ hw->generation++;
+}
+
+static void ifcvf_add_status(struct ifcvf_hw *hw, u8 status)
+{
+ÂÂÂ if (status != 0)
+ÂÂÂÂÂÂÂ status |= ifcvf_get_status(hw);
+
+ÂÂÂ ifcvf_set_status(hw, status);
+ÂÂÂ ifcvf_get_status(hw);
+}
+
+u64 ifcvf_get_features(struct ifcvf_hw *hw)
+{
+ÂÂÂ u32 features_lo, features_hi;
+ÂÂÂ struct virtio_pci_common_cfg *cfg = hw->common_cfg;
+
+ÂÂÂ iowrite32(0, &cfg->device_feature_select);
+ÂÂÂ features_lo = ioread32(&cfg->device_feature);
+
+ÂÂÂ iowrite32(1, &cfg->device_feature_select);
+ÂÂÂ features_hi = ioread32(&cfg->device_feature);
+
+ÂÂÂ return ((u64)features_hi << 32) | features_lo;
+}
+static int ifcvf_with_feature(struct ifcvf_hw *hw, u64 bit)
+{
+ÂÂÂ return (hw->req_features & (1ULL << bit)) != 0;
+}
+
+static void ifcvf_read_dev_config(struct ifcvf_hw *hw, u64 offset,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂ void *dst, int length)
+{
+ÂÂÂ int i;
+ÂÂÂ u8 *p;
+ÂÂÂ u8 old_gen, new_gen;
+
+ÂÂÂ do {
+ÂÂÂÂÂÂÂ old_gen = ioread8(&hw->common_cfg->config_generation);
+
+ÂÂÂÂÂÂÂ p = dst;
+ÂÂÂÂÂÂÂ for (i = 0; i < length; i++)
+ÂÂÂÂÂÂÂÂÂÂÂ *p++ = ioread8((u8 *)hw->dev_cfg + offset + i);
+
+ÂÂÂÂÂÂÂ new_gen = ioread8(&hw->common_cfg->config_generation);
+ÂÂÂ } while (old_gen != new_gen);
+}
+
+void ifcvf_get_linkstatus(struct ifcvf_hw *hw, u8 *is_linkup)
+{


Why not just return bollean?
sure, can do.


+ÂÂÂ u16 status;
+ÂÂÂ u64 host_features;
+
+ÂÂÂ host_features = ifcvf_get_features(hw);
+ÂÂÂ if (ifcvf_with_feature(hw, VIRTIO_NET_F_STATUS)) {
+ÂÂÂÂÂÂÂ ifcvf_read_dev_config(hw,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ offsetof(struct ifcvf_net_config, status),
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ &status, sizeof(status));
+ÂÂÂÂÂÂÂ if ((status & VIRTIO_NET_S_LINK_UP) == 0)
+ÂÂÂÂÂÂÂÂÂÂÂ (*is_linkup) = 1;
+ÂÂÂÂÂÂÂ else
+ÂÂÂÂÂÂÂÂÂÂÂ (*is_linkup) = 0;
+ÂÂÂ } else
+ÂÂÂÂÂÂÂ (*is_linkup) = 0;
+}
+
+static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features)
+{
+ÂÂÂ struct virtio_pci_common_cfg *cfg = hw->common_cfg;
+
+ÂÂÂ iowrite32(0, &cfg->guest_feature_select);
+ÂÂÂ iowrite32(features & ((1ULL << 32) - 1), &cfg->guest_feature);
+
+ÂÂÂ iowrite32(1, &cfg->guest_feature_select);
+ÂÂÂ iowrite32(features >> 32, &cfg->guest_feature);
+}
+
+static int ifcvf_config_features(struct ifcvf_hw *hw)
+{
+ÂÂÂ u64 host_features;
+ÂÂÂ struct ifcvf_adapter *ifcvf =
+ÂÂÂÂÂÂÂ container_of(hw, struct ifcvf_adapter, vf);
+
+ÂÂÂ host_features = ifcvf_get_features(hw);
+ÂÂÂ hw->req_features &= host_features;


Is this a must, can't device deal with this?
I will usehw->req_features directly, thanks for point it out.


+
+ÂÂÂ ifcvf_set_features(hw, hw->req_features);
+ÂÂÂ ifcvf_add_status(hw, VIRTIO_CONFIG_S_FEATURES_OK);
+
+ÂÂÂ if (!(ifcvf_get_status(hw) & VIRTIO_CONFIG_S_FEATURES_OK)) {
+ÂÂÂÂÂÂÂ IFC_ERR(ifcvf->dev, "Failed to set FEATURES_OK status\n");
+ÂÂÂÂÂÂÂ return -EIO;
+ÂÂÂ }
+
+ÂÂÂ return 0;
+}
+
+static void io_write64_twopart(u64 val, u32 *lo, u32 *hi)
+{
+ÂÂÂ iowrite32(val & ((1ULL << 32) - 1), lo);
+ÂÂÂ iowrite32(val >> 32, hi);
+}
+
+static int ifcvf_hw_enable(struct ifcvf_hw *hw)
+{
+ÂÂÂ struct virtio_pci_common_cfg *cfg;
+ÂÂÂ u8 *lm_cfg;
+ÂÂÂ u32 i;
+ÂÂÂ struct ifcvf_adapter *ifcvf =
+ÂÂÂÂÂÂÂ container_of(hw, struct ifcvf_adapter, vf);
+
+ÂÂÂ cfg = hw->common_cfg;
+ÂÂÂ lm_cfg = hw->lm_cfg;
+
+ÂÂÂ iowrite16(IFCVF_MSI_CONFIG_OFF, &cfg->msix_config);
+ÂÂÂ if (ioread16(&cfg->msix_config) == VIRTIO_MSI_NO_VECTOR) {
+ÂÂÂÂÂÂÂ IFC_ERR(ifcvf->dev, "No msix vector for device config.\n");
+ÂÂÂÂÂÂÂ return -1;
+ÂÂÂ }
+
+ÂÂÂ for (i = 0; i < hw->nr_vring; i++) {
+ÂÂÂÂÂÂÂ iowrite16(i, &cfg->queue_select);
+ÂÂÂÂÂÂÂ io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ &cfg->queue_desc_hi);
+ÂÂÂÂÂÂÂ io_write64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ &cfg->queue_avail_hi);
+ÂÂÂÂÂÂÂ io_write64_twopart(hw->vring[i].used, &cfg->queue_used_lo,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ &cfg->queue_used_hi);
+ÂÂÂÂÂÂÂ iowrite16(hw->vring[i].size, &cfg->queue_size);
+
+ÂÂÂÂÂÂÂ *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
+ÂÂÂÂÂÂÂÂÂÂÂ (u32)hw->vring[i].last_avail_idx |
+ÂÂÂÂÂÂÂÂÂÂÂ ((u32)hw->vring[i].last_used_idx << 16);
+
+ÂÂÂÂÂÂÂ iowrite16(i + IFCVF_MSI_QUEUE_OFF, &cfg->queue_msix_vector);
+ÂÂÂÂÂÂÂ if (ioread16(&cfg->queue_msix_vector) ==
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ VIRTIO_MSI_NO_VECTOR) {
+ÂÂÂÂÂÂÂÂÂÂÂ IFC_ERR(ifcvf->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "No msix vector for queue %u.\n", i);
+ÂÂÂÂÂÂÂÂÂÂÂ return -1;
+ÂÂÂÂÂÂÂ }
+
+ÂÂÂÂÂÂÂ iowrite16(1, &cfg->queue_enable);
+ÂÂÂ }
+
+ÂÂÂ return 0;
+}
+
+static void ifcvf_hw_disable(struct ifcvf_hw *hw)
+{
+ÂÂÂ u32 i;
+ÂÂÂ struct virtio_pci_common_cfg *cfg;
+
+ÂÂÂ cfg = hw->common_cfg;
+
+ÂÂÂ iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->msix_config);
+ÂÂÂ for (i = 0; i < hw->nr_vring; i++) {
+ÂÂÂÂÂÂÂ iowrite16(i, &cfg->queue_select);
+ÂÂÂÂÂÂÂ iowrite16(0, &cfg->queue_enable);
+ÂÂÂÂÂÂÂ iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->queue_msix_vector);
+ÂÂÂ }
+}
+
+int ifcvf_start_hw(struct ifcvf_hw *hw)
+{
+ÂÂÂ ifcvf_reset(hw);
+ÂÂÂ ifcvf_add_status(hw, VIRTIO_CONFIG_S_ACKNOWLEDGE);
+ÂÂÂ ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER);
+
+ÂÂÂ if (ifcvf_config_features(hw) < 0)
+ÂÂÂÂÂÂÂ return -1;
+
+ÂÂÂ if (ifcvf_hw_enable(hw) < 0)
+ÂÂÂÂÂÂÂ return -1;
+
+ÂÂÂ ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER_OK);
+
+ÂÂÂ return 0;
+}
+
+void ifcvf_stop_hw(struct ifcvf_hw *hw)
+{
+ÂÂÂ ifcvf_hw_disable(hw);
+ÂÂÂ ifcvf_reset(hw);
+}
+
+void ifcvf_enable_logging_vf(struct ifcvf_hw *hw, u64 log_base, u64 log_size)
+{
+ÂÂÂ u8 *lm_cfg;
+
+ÂÂÂ lm_cfg = hw->lm_cfg;
+
+ÂÂÂ *(u32 *)(lm_cfg + IFCVF_LM_BASE_ADDR_LOW) =
+ÂÂÂÂÂÂÂ log_base & IFCVF_32_BIT_MASK;
+
+ÂÂÂ *(u32 *)(lm_cfg + IFCVF_LM_BASE_ADDR_HIGH) =
+ÂÂÂÂÂÂÂ (log_base >> 32) & IFCVF_32_BIT_MASK;
+
+ÂÂÂ *(u32 *)(lm_cfg + IFCVF_LM_END_ADDR_LOW) =
+ÂÂÂÂÂÂÂ (log_base + log_size) & IFCVF_32_BIT_MASK;
+
+ÂÂÂ *(u32 *)(lm_cfg + IFCVF_LM_END_ADDR_HIGH) =
+ÂÂÂÂÂÂÂ ((log_base + log_size) >> 32) & IFCVF_32_BIT_MASK;
+
+ÂÂÂ *(u32 *)(lm_cfg + IFCVF_LM_LOGGING_CTRL) = IFCVF_LM_ENABLE_VF;
+}


Is the device using iova or gpa for the logging?
gpa, I will remove all LM related functions since we plan to support LM in next version driver.


Ok, that's why vIOMMU is not fully supported in the case. So we need

1) Filter out _F_IOMMU_PLATFORM for vhost-mdev

2) Can keep it for virtio-mdev

But one more question is: how device know which kinds of address it is used? Or I guess the device doesn't know the only problem is IOVA->GPA conversion in the case of vIOMMU. If this is true, maybe we can introduce API to sync dirty pages and do the conversion there instead of using share memory as the log as what current vhost did.




+
+void ifcvf_disable_logging(struct ifcvf_hw *hw)
+{
+ÂÂÂ u8 *lm_cfg;
+
+ÂÂÂ lm_cfg = hw->lm_cfg;
+ÂÂÂ *(u32 *)(lm_cfg + IFCVF_LM_LOGGING_CTRL) = IFCVF_LM_DISABLE;
+}
+
+void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid)
+{
+
+ÂÂÂ iowrite16(qid, hw->notify_addr[qid]);
+}
+
+u8 ifcvf_get_notify_region(struct ifcvf_hw *hw)
+{
+ÂÂÂ return hw->notify_bar;
+}
+
+u64 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid)
+{
+ÂÂÂ return (u8 *)hw->notify_addr[qid] -
+ÂÂÂÂÂÂÂ (u8 *)hw->mem_resource[hw->notify_bar].addr;
+}
diff --git a/drivers/vhost/ifcvf/ifcvf_base.h b/drivers/vhost/ifcvf/ifcvf_base.h
new file mode 100644
index 000000000000..1ab1a1c40f24
--- /dev/null
+++ b/drivers/vhost/ifcvf/ifcvf_base.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2019 Intel Corporation.
+ */
+
+#ifndef _IFCVF_H_
+#define _IFCVF_H_
+
+#include <linux/virtio_mdev.h>
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
+#include <uapi/linux/virtio_net.h>
+#include <uapi/linux/virtio_config.h>
+#include <uapi/linux/virtio_pci.h>
+
+#define IFCVF_VENDOR_IDÂÂÂÂÂÂÂÂ 0x1AF4
+#define IFCVF_DEVICE_IDÂÂÂÂÂÂÂÂ 0x1041
+#define IFCVF_SUBSYS_VENDOR_IDÂ 0x8086
+#define IFCVF_SUBSYS_DEVICE_IDÂ 0x001A
+
+/*
+ * Some ifcvf feature bits (currently bits 28 through 31) are
+ * reserved for the transport being used (eg. ifcvf_ring), the
+ * rest are per-device feature bits.
+ */
+#define IFCVF_TRANSPORT_F_START 28
+#define IFCVF_TRANSPORT_F_ENDÂÂ 34
+
+#define IFC_SUPPORTED_FEATURES \
+ÂÂÂÂÂÂÂ ((1ULL << VIRTIO_NET_F_MAC)ÂÂÂÂÂÂÂÂÂÂÂ | \
+ÂÂÂÂÂÂÂÂ (1ULL << VIRTIO_F_ANY_LAYOUT)ÂÂÂÂÂÂÂÂÂÂÂ | \
+ÂÂÂÂÂÂÂÂ (1ULL << VIRTIO_F_VERSION_1) | \
+ÂÂÂÂÂÂÂÂ (1ULL << VHOST_F_LOG_ALL)ÂÂÂÂÂÂÂÂÂÂÂ | \
+ÂÂÂÂÂÂÂÂ (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE)ÂÂÂÂÂÂÂ | \
+ÂÂÂÂÂÂÂÂ (1ULL << VIRTIO_NET_F_CTRL_VQ)ÂÂÂÂÂÂÂÂÂÂÂ | \
+ÂÂÂÂÂÂÂÂ (1ULL << VIRTIO_NET_F_STATUS)ÂÂÂÂÂÂÂÂÂÂÂ | \
+ÂÂÂÂÂÂÂÂ (1ULL << VIRTIO_NET_F_MRG_RXBUF)) /* not fully supported */
+
+#define IFCVF_MAX_QUEUE_PAIRSÂÂÂÂÂÂÂ 1
+#define IFCVF_MAX_QUEUESÂÂÂÂÂÂÂ 2
+
+#define IFCVF_QUEUE_ALIGNMENTÂÂÂÂÂÂÂ PAGE_SIZE
+
+#define IFCVF_MSI_CONFIG_OFFÂÂÂ 0
+#define IFCVF_MSI_QUEUE_OFFÂÂÂ 1
+#define IFCVF_PCI_MAX_RESOURCEÂÂÂ 6
+
+/* 46 bit CPU physical address, avoid overlap */
+#define LM_IOVA 0x400000000000
+
+#define IFCVF_LM_CFG_SIZEÂÂÂÂÂÂÂ 0x40
+#define IFCVF_LM_RING_STATE_OFFSETÂÂÂ 0x20
+
+#define IFCVF_LM_LOGGING_CTRLÂÂÂÂÂÂÂ 0x0
+
+#define IFCVF_LM_BASE_ADDR_LOWÂÂÂÂÂÂÂ 0x10
+#define IFCVF_LM_BASE_ADDR_HIGHÂÂÂÂÂÂÂ 0x14
+#define IFCVF_LM_END_ADDR_LOWÂÂÂÂÂÂÂ 0x18
+#define IFCVF_LM_END_ADDR_HIGHÂÂÂÂÂÂÂ 0x1c
+
+#define IFCVF_LM_DISABLEÂÂÂÂÂÂÂ 0x0
+#define IFCVF_LM_ENABLE_VFÂÂÂÂÂÂÂ 0x1
+#define IFCVF_LM_ENABLE_PFÂÂÂÂÂÂÂ 0x3
+
+#define IFCVF_32_BIT_MASKÂÂÂÂÂÂÂ 0xffffffff
+
+#define IFC_ERR(dev, fmt, ...)ÂÂÂ dev_err(dev, fmt, ##__VA_ARGS__)
+#define IFC_INFO(dev, fmt, ...)ÂÂÂ dev_info(dev, fmt, ##__VA_ARGS__)
+
+struct ifcvf_net_config {
+ÂÂÂ u8ÂÂÂ mac[6];
+ÂÂÂ u16ÂÂ status;
+ÂÂÂ u16ÂÂ max_virtqueue_pairs;
+} __packed;
+
+struct ifcvf_pci_mem_resource {
+ÂÂÂ u64ÂÂÂÂÂ phys_addr; /**< Physical address, 0 if not resource. */
+ÂÂÂ u64ÂÂÂÂÂ len;ÂÂÂÂÂÂ /**< Length of the resource. */
+ÂÂÂ u8ÂÂÂÂÂÂ *addr;ÂÂÂÂ /**< Virtual address, NULL when not mapped. */
+};
+
+struct vring_info {
+ÂÂÂ u64 desc;
+ÂÂÂ u64 avail;
+ÂÂÂ u64 used;
+ÂÂÂ u16 size;
+ÂÂÂ u16 last_avail_idx;
+ÂÂÂ u16 last_used_idx;
+ÂÂÂ bool ready;
+ÂÂÂ char msix_name[256];
+ÂÂÂ struct virtio_mdev_callback cb;
+};
+
+struct ifcvf_hw {
+ÂÂÂ u8ÂÂÂ *isr;
+ÂÂÂ u8ÂÂÂ notify_bar;
+ÂÂÂ u8ÂÂÂ *lm_cfg;
+ÂÂÂ u8ÂÂÂ status;
+ÂÂÂ u8ÂÂÂ nr_vring;


Is the the number of queue currently used?
Do you mean nr_vring? Yes it is used in hardware enable / disable functions.


+ÂÂÂ u16ÂÂÂ *notify_base;
+ÂÂÂ u16ÂÂÂ *notify_addr[IFCVF_MAX_QUEUE_PAIRS * 2];
+ÂÂÂ u32ÂÂÂ generation;
+ÂÂÂ u32ÂÂÂ notify_off_multiplier;
+ÂÂÂ u64ÂÂÂ req_features;
+ÂÂÂ structÂÂÂ virtio_pci_common_cfg *common_cfg;
+ÂÂÂ structÂÂÂ ifcvf_net_config *dev_cfg;
+ÂÂÂ structÂÂÂ vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2];
+ÂÂÂ structÂÂÂ ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
+};
+
+#define IFC_PRIVATE_TO_VF(adapter) \
+ÂÂÂ (&((struct ifcvf_adapter *)adapter)->vf)
+
+#define IFCVF_MAX_INTR (IFCVF_MAX_QUEUE_PAIRS * 2 + 1)


The extra one means the config interrupt?
Yes.


Ok, when we support control vq, it should be changed to 2*N + 2.

Thanks




+
+struct ifcvf_adapter {
+ÂÂÂ structÂÂÂ device *dev;
+ÂÂÂ structÂÂÂ mutex mdev_lock;


Not used in the patch, move to next one?
Sure, these not used ones will be moved to small patches where they are used in v1 patchset.


+ÂÂÂ intÂÂÂ mdev_count;


Not used.


+ÂÂÂ structÂÂÂ list_head dma_maps;


This is not used.

Thanks


+ÂÂÂ intÂÂÂ vectors;
+ÂÂÂ structÂÂÂ ifcvf_hw vf;
+};
+
+int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev);
+u64 ifcvf_get_features(struct ifcvf_hw *hw);
+int ifcvf_start_hw(struct ifcvf_hw *hw);
+void ifcvf_stop_hw(struct ifcvf_hw *hw);
+void ifcvf_enable_logging(struct ifcvf_hw *hw, u64 log_base, u64 log_size);
+void ifcvf_enable_logging_vf(struct ifcvf_hw *hw, u64 log_base, u64 log_size);
+void ifcvf_disable_logging(struct ifcvf_hw *hw);
+void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid);
+void ifcvf_get_linkstatus(struct ifcvf_hw *hw, u8 *is_linkup);
+u8 ifcvf_get_notify_region(struct ifcvf_hw *hw);
+u64 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
+
+#endif /* _IFCVF_H_ */