Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
在 2023/3/13 20:10, Gautam Dawar 写道:
On 3/10/23 10:35, Jason Wang wrote:
Caution: This message originated from an External Source. Use propervdpa_state is set EF100_VDPA_STATE_STARTED during DRIVER_OK handling.
caution when opening attachments, clicking links, or responding.
On Tue, Mar 7, 2023 at 7:38 PM Gautam Dawar <gautam.dawar@xxxxxxx>
wrote:
vDPA config opertions to handle get/set device status and deviceIt looks to me that this duplicates with the DRIVER_OK status bit.
reset have been implemented. Also .suspend config operation is
implemented to support Live Migration.
Signed-off-by: Gautam Dawar <gautam.dawar@xxxxxxx>
---
drivers/net/ethernet/sfc/ef100_vdpa.c | 16 +-
drivers/net/ethernet/sfc/ef100_vdpa.h | 2 +
drivers/net/ethernet/sfc/ef100_vdpa_ops.c | 367
++++++++++++++++++++--
3 files changed, 355 insertions(+), 30 deletions(-)
diff --git a/drivers/net/ethernet/sfc/ef100_vdpa.c
b/drivers/net/ethernet/sfc/ef100_vdpa.c
index c66e5aef69ea..4ba57827a6cd 100644
--- a/drivers/net/ethernet/sfc/ef100_vdpa.c
+++ b/drivers/net/ethernet/sfc/ef100_vdpa.c
@@ -68,9 +68,14 @@ static int vdpa_allocate_vis(struct efx_nic *efx,
unsigned int *allocated_vis)
static void ef100_vdpa_delete(struct efx_nic *efx)
{
+ struct vdpa_device *vdpa_dev;
+
if (efx->vdpa_nic) {
+ vdpa_dev = &efx->vdpa_nic->vdpa_dev;
+ ef100_vdpa_reset(vdpa_dev);
+
/* replace with _vdpa_unregister_device later */
- put_device(&efx->vdpa_nic->vdpa_dev.dev);
+ put_device(&vdpa_dev->dev);
}
efx_mcdi_free_vis(efx);
}
@@ -171,6 +176,15 @@ static struct ef100_vdpa_nic
*ef100_vdpa_create(struct efx_nic *efx,
}
}
+ rc = devm_add_action_or_reset(&efx->pci_dev->dev,
+ ef100_vdpa_irq_vectors_free,
+ efx->pci_dev);
+ if (rc) {
+ pci_err(efx->pci_dev,
+ "Failed adding devres for freeing irq
vectors\n");
+ goto err_put_device;
+ }
+
rc = get_net_config(vdpa_nic);
if (rc)
goto err_put_device;
diff --git a/drivers/net/ethernet/sfc/ef100_vdpa.h
b/drivers/net/ethernet/sfc/ef100_vdpa.h
index 348ca8a7404b..58791402e454 100644
--- a/drivers/net/ethernet/sfc/ef100_vdpa.h
+++ b/drivers/net/ethernet/sfc/ef100_vdpa.h
@@ -149,6 +149,8 @@ int ef100_vdpa_register_mgmtdev(struct efx_nic
*efx);
void ef100_vdpa_unregister_mgmtdev(struct efx_nic *efx);
void ef100_vdpa_irq_vectors_free(void *data);
int ef100_vdpa_init_vring(struct ef100_vdpa_nic *vdpa_nic, u16 idx);
+void ef100_vdpa_irq_vectors_free(void *data);
+int ef100_vdpa_reset(struct vdpa_device *vdev);
static inline bool efx_vdpa_is_little_endian(struct ef100_vdpa_nic
*vdpa_nic)
{
diff --git a/drivers/net/ethernet/sfc/ef100_vdpa_ops.c
b/drivers/net/ethernet/sfc/ef100_vdpa_ops.c
index 0051c4c0e47c..95a2177f85a2 100644
--- a/drivers/net/ethernet/sfc/ef100_vdpa_ops.c
+++ b/drivers/net/ethernet/sfc/ef100_vdpa_ops.c
@@ -22,11 +22,6 @@ static struct ef100_vdpa_nic *get_vdpa_nic(struct
vdpa_device *vdev)
return container_of(vdev, struct ef100_vdpa_nic, vdpa_dev);
}
-void ef100_vdpa_irq_vectors_free(void *data)
-{
- pci_free_irq_vectors(data);
-}
-
static int create_vring_ctx(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
{
struct efx_vring_ctx *vring_ctx;
@@ -52,14 +47,6 @@ static void delete_vring_ctx(struct
ef100_vdpa_nic *vdpa_nic, u16 idx)
vdpa_nic->vring[idx].vring_ctx = NULL;
}
-static void reset_vring(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
-{
- vdpa_nic->vring[idx].vring_type = EF100_VDPA_VQ_NTYPES;
- vdpa_nic->vring[idx].vring_state = 0;
- vdpa_nic->vring[idx].last_avail_idx = 0;
- vdpa_nic->vring[idx].last_used_idx = 0;
-}
-
int ef100_vdpa_init_vring(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
{
u32 offset;
@@ -103,6 +90,236 @@ static bool is_qid_invalid(struct
ef100_vdpa_nic *vdpa_nic, u16 idx,
return false;
}
+static void irq_vring_fini(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
+{
+ struct ef100_vdpa_vring_info *vring = &vdpa_nic->vring[idx];
+ struct pci_dev *pci_dev = vdpa_nic->efx->pci_dev;
+
+ devm_free_irq(&pci_dev->dev, vring->irq, vring);
+ vring->irq = -EINVAL;
+}
+
+static irqreturn_t vring_intr_handler(int irq, void *arg)
+{
+ struct ef100_vdpa_vring_info *vring = arg;
+
+ if (vring->cb.callback)
+ return vring->cb.callback(vring->cb.private);
+
+ return IRQ_NONE;
+}
+
+static int ef100_vdpa_irq_vectors_alloc(struct pci_dev *pci_dev,
u16 nvqs)
+{
+ int rc;
+
+ rc = pci_alloc_irq_vectors(pci_dev, nvqs, nvqs, PCI_IRQ_MSIX);
+ if (rc < 0)
+ pci_err(pci_dev,
+ "Failed to alloc %d IRQ vectors, err:%d\n",
nvqs, rc);
+ return rc;
+}
+
+void ef100_vdpa_irq_vectors_free(void *data)
+{
+ pci_free_irq_vectors(data);
+}
+
+static int irq_vring_init(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
+{
+ struct ef100_vdpa_vring_info *vring = &vdpa_nic->vring[idx];
+ struct pci_dev *pci_dev = vdpa_nic->efx->pci_dev;
+ int irq;
+ int rc;
+
+ snprintf(vring->msix_name, 256, "x_vdpa[%s]-%d\n",
+ pci_name(pci_dev), idx);
+ irq = pci_irq_vector(pci_dev, idx);
+ rc = devm_request_irq(&pci_dev->dev, irq,
vring_intr_handler, 0,
+ vring->msix_name, vring);
+ if (rc)
+ pci_err(pci_dev,
+ "devm_request_irq failed for vring %d, rc
%d\n",
+ idx, rc);
+ else
+ vring->irq = irq;
+
+ return rc;
+}
+
+static int delete_vring(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
+{
+ struct efx_vring_dyn_cfg vring_dyn_cfg;
+ int rc;
+
+ if (!(vdpa_nic->vring[idx].vring_state & EF100_VRING_CREATED))
+ return 0;
+
+ rc = efx_vdpa_vring_destroy(vdpa_nic->vring[idx].vring_ctx,
+ &vring_dyn_cfg);
+ if (rc)
+ dev_err(&vdpa_nic->vdpa_dev.dev,
+ "%s: delete vring failed index:%u, err:%d\n",
+ __func__, idx, rc);
+ vdpa_nic->vring[idx].last_avail_idx = vring_dyn_cfg.avail_idx;
+ vdpa_nic->vring[idx].last_used_idx = vring_dyn_cfg.used_idx;
+ vdpa_nic->vring[idx].vring_state &= ~EF100_VRING_CREATED;
+
+ irq_vring_fini(vdpa_nic, idx);
+
+ return rc;
+}
+
+static void ef100_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
+{
+ struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
+ u32 idx_val;
+
+ if (is_qid_invalid(vdpa_nic, idx, __func__))
+ return;
+
+ if (!(vdpa_nic->vring[idx].vring_state & EF100_VRING_CREATED))
+ return;
+
+ idx_val = idx;
+ _efx_writed(vdpa_nic->efx, cpu_to_le32(idx_val),
+ vdpa_nic->vring[idx].doorbell_offset);
+}
+
+static bool can_create_vring(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
+{
+ if (vdpa_nic->vring[idx].vring_state ==
EF100_VRING_CONFIGURED &&
+ vdpa_nic->status & VIRTIO_CONFIG_S_DRIVER_OK &&
+ !(vdpa_nic->vring[idx].vring_state & EF100_VRING_CREATED))
+ return true;
+
+ return false;
+}
+
+static int create_vring(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
+{
+ struct efx_vring_dyn_cfg vring_dyn_cfg;
+ struct efx_vring_cfg vring_cfg;
+ int rc;
+
+ rc = irq_vring_init(vdpa_nic, idx);
+ if (rc) {
+ dev_err(&vdpa_nic->vdpa_dev.dev,
+ "%s: irq_vring_init failed. index:%u,
err:%d\n",
+ __func__, idx, rc);
+ return rc;
+ }
+ vring_cfg.desc = vdpa_nic->vring[idx].desc;
+ vring_cfg.avail = vdpa_nic->vring[idx].avail;
+ vring_cfg.used = vdpa_nic->vring[idx].used;
+ vring_cfg.size = vdpa_nic->vring[idx].size;
+ vring_cfg.features = vdpa_nic->features;
+ vring_cfg.msix_vector = idx;
+ vring_dyn_cfg.avail_idx = vdpa_nic->vring[idx].last_avail_idx;
+ vring_dyn_cfg.used_idx = vdpa_nic->vring[idx].last_used_idx;
+
+ rc = efx_vdpa_vring_create(vdpa_nic->vring[idx].vring_ctx,
+ &vring_cfg, &vring_dyn_cfg);
+ if (rc) {
+ dev_err(&vdpa_nic->vdpa_dev.dev,
+ "%s: vring_create failed index:%u, err:%d\n",
+ __func__, idx, rc);
+ goto err_vring_create;
+ }
+ vdpa_nic->vring[idx].vring_state |= EF100_VRING_CREATED;
+
+ /* A VQ kick allows the device to read the avail_idx, which
will be
+ * required at the destination after live migration.
+ */
+ ef100_vdpa_kick_vq(&vdpa_nic->vdpa_dev, idx);
+
+ return 0;
+
+err_vring_create:
+ irq_vring_fini(vdpa_nic, idx);
+ return rc;
+}
+
+static void reset_vring(struct ef100_vdpa_nic *vdpa_nic, u16 idx)
+{
+ delete_vring(vdpa_nic, idx);
+ vdpa_nic->vring[idx].vring_type = EF100_VDPA_VQ_NTYPES;
+ vdpa_nic->vring[idx].vring_state = 0;
+ vdpa_nic->vring[idx].last_avail_idx = 0;
+ vdpa_nic->vring[idx].last_used_idx = 0;
+}
+
+static void ef100_reset_vdpa_device(struct ef100_vdpa_nic *vdpa_nic)
+{
+ int i;
+
+ WARN_ON(!mutex_is_locked(&vdpa_nic->lock));
+
+ if (!vdpa_nic->status)
+ return;
+
+ vdpa_nic->vdpa_state = EF100_VDPA_STATE_INITIALIZED;
+ vdpa_nic->status = 0;
+ vdpa_nic->features = 0;
+ for (i = 0; i < (vdpa_nic->max_queue_pairs * 2); i++)
+ reset_vring(vdpa_nic, i);
+ ef100_vdpa_irq_vectors_free(vdpa_nic->efx->pci_dev);
+}
+
+/* May be called under the rtnl lock */
+int ef100_vdpa_reset(struct vdpa_device *vdev)
+{
+ struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
+
+ /* vdpa device can be deleted anytime but the bar_config
+ * could still be vdpa and hence efx->state would be
STATE_VDPA.
+ * Accordingly, ensure vdpa device exists before reset handling
+ */
+ if (!vdpa_nic)
+ return -ENODEV;
+
+ mutex_lock(&vdpa_nic->lock);
+ ef100_reset_vdpa_device(vdpa_nic);
+ mutex_unlock(&vdpa_nic->lock);
+ return 0;
+}
+
+static int start_vdpa_device(struct ef100_vdpa_nic *vdpa_nic)
+{
+ struct efx_nic *efx = vdpa_nic->efx;
+ struct ef100_nic_data *nic_data;
+ int i, j;
+ int rc;
+
+ nic_data = efx->nic_data;
+ rc = ef100_vdpa_irq_vectors_alloc(efx->pci_dev,
+ vdpa_nic->max_queue_pairs * 2);
+ if (rc < 0) {
+ pci_err(efx->pci_dev,
+ "vDPA IRQ alloc failed for vf: %u err:%d\n",
+ nic_data->vf_index, rc);
+ return rc;
+ }
+
+ for (i = 0; i < (vdpa_nic->max_queue_pairs * 2); i++) {
+ if (can_create_vring(vdpa_nic, i)) {
+ rc = create_vring(vdpa_nic, i);
+ if (rc)
+ goto clear_vring;
+ }
+ }
+
+ vdpa_nic->vdpa_state = EF100_VDPA_STATE_STARTED;
See my later response for its purpose.
+ return 0;It's better not to work around userspace bugs in the kernel. We should
+
+clear_vring:
+ for (j = 0; j < i; j++)
+ delete_vring(vdpa_nic, j);
+
+ ef100_vdpa_irq_vectors_free(efx->pci_dev);
+ return rc;
+}
+
static int ef100_vdpa_set_vq_address(struct vdpa_device *vdev,
u16 idx, u64 desc_area, u64
driver_area,
u64 device_area)
@@ -144,22 +361,6 @@ static void ef100_vdpa_set_vq_num(struct
vdpa_device *vdev, u16 idx, u32 num)
mutex_unlock(&vdpa_nic->lock);
}
-static void ef100_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
-{
- struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
- u32 idx_val;
-
- if (is_qid_invalid(vdpa_nic, idx, __func__))
- return;
-
- if (!(vdpa_nic->vring[idx].vring_state & EF100_VRING_CREATED))
- return;
-
- idx_val = idx;
- _efx_writed(vdpa_nic->efx, cpu_to_le32(idx_val),
- vdpa_nic->vring[idx].doorbell_offset);
-}
-
static void ef100_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx,
struct vdpa_callback *cb)
{
@@ -176,6 +377,7 @@ static void ef100_vdpa_set_vq_ready(struct
vdpa_device *vdev, u16 idx,
bool ready)
{
struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
+ int rc;
if (is_qid_invalid(vdpa_nic, idx, __func__))
return;
@@ -184,9 +386,21 @@ static void ef100_vdpa_set_vq_ready(struct
vdpa_device *vdev, u16 idx,
if (ready) {
vdpa_nic->vring[idx].vring_state |=
EF100_VRING_READY_CONFIGURED;
+ if (vdpa_nic->vdpa_state == EF100_VDPA_STATE_STARTED &&
+ can_create_vring(vdpa_nic, idx)) {
+ rc = create_vring(vdpa_nic, idx);
+ if (rc)
+ /* Rollback ready configuration
+ * So that the above layer driver
+ * can make another attempt to set
ready
+ */
+ vdpa_nic->vring[idx].vring_state &=
+ ~EF100_VRING_READY_CONFIGURED;
+ }
} else {
vdpa_nic->vring[idx].vring_state &=
~EF100_VRING_READY_CONFIGURED;
+ delete_vring(vdpa_nic, idx);
}
mutex_unlock(&vdpa_nic->lock);
}
@@ -296,6 +510,12 @@ static u64
ef100_vdpa_get_device_features(struct vdpa_device *vdev)
}
features |= BIT_ULL(VIRTIO_NET_F_MAC);
+ /* As QEMU SVQ doesn't implement the following features,
+ * masking them off to allow Live Migration
+ */
+ features &= ~BIT_ULL(VIRTIO_F_IN_ORDER);
+ features &= ~BIT_ULL(VIRTIO_F_ORDER_PLATFORM);
fix Qemu instead.
There's already a QEMU patch [1] submitted to support
VIRTIO_F_ORDER_PLATFORM but it hasn't concluded yet. Also, there is no
support for VIRTIO_F_IN_ORDER in the kernel virtio driver. The motive
of this change is to have VM Live Migration working with the kernel
in-tree driver without requiring any changes.
Once QEMU is able to handle these features, we can submit a patch to
undo these changes.
I can understand the motivation, but it works for prototyping but not
formal kernel code (especially consider SVQ is not mature and still
being development). What's more, we can not assume Qemu is the only
user, we have other users like DPDK and cloud-hypervisors.
Sure, will do.
Thanks
Will change.
+This is trigger-able by the userspace. It might be better to use
return features;
}
@@ -356,6 +576,77 @@ static u32 ef100_vdpa_get_vendor_id(struct
vdpa_device *vdev)
return EF100_VDPA_VENDOR_ID;
}
+static u8 ef100_vdpa_get_status(struct vdpa_device *vdev)
+{
+ struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
+ u8 status;
+
+ mutex_lock(&vdpa_nic->lock);
+ status = vdpa_nic->status;
+ mutex_unlock(&vdpa_nic->lock);
+ return status;
+}
+
+static void ef100_vdpa_set_status(struct vdpa_device *vdev, u8 status)
+{
+ struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
+ u8 new_status;
+ int rc;
+
+ mutex_lock(&vdpa_nic->lock);
+ if (!status) {
+ dev_info(&vdev->dev,
+ "%s: Status received is 0. Device reset
being done\n",
+ __func__);
dev_dbg() instead.
Ok.
+ ef100_reset_vdpa_device(vdpa_nic);Same here.
+ goto unlock_return;
+ }
+ new_status = status & ~vdpa_nic->status;
+ if (new_status == 0) {
+ dev_info(&vdev->dev,
+ "%s: New status same as current status\n",
__func__);
vdpa_state helps to ensure correct status transitions in the
+ goto unlock_return;It might be better to explain the reason we need to track another
+ }
+ if (new_status & VIRTIO_CONFIG_S_FAILED) {
+ ef100_reset_vdpa_device(vdpa_nic);
+ goto unlock_return;
+ }
+
+ if (new_status & VIRTIO_CONFIG_S_ACKNOWLEDGE) {
+ vdpa_nic->status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
+ new_status &= ~VIRTIO_CONFIG_S_ACKNOWLEDGE;
+ }
+ if (new_status & VIRTIO_CONFIG_S_DRIVER) {
+ vdpa_nic->status |= VIRTIO_CONFIG_S_DRIVER;
+ new_status &= ~VIRTIO_CONFIG_S_DRIVER;
+ }
+ if (new_status & VIRTIO_CONFIG_S_FEATURES_OK) {
+ vdpa_nic->status |= VIRTIO_CONFIG_S_FEATURES_OK;
+ vdpa_nic->vdpa_state = EF100_VDPA_STATE_NEGOTIATED;
state in vdpa_state instead of simply using the device status.
.set_status callback and safe-guards against incorrect/malicious
user-space driver.
Ok, let's document this in the definition of vdpa_state.
I think this can be dealt with an additional SUSPEND state which can be used to avoid any changes to the config space.
Are you referring to the possibility of updating device configuration
+ new_status &= ~VIRTIO_CONFIG_S_FEATURES_OK;Note that the suspension matters for the whole device. It means the
+ }
+ if (new_status & VIRTIO_CONFIG_S_DRIVER_OK &&
+ vdpa_nic->vdpa_state == EF100_VDPA_STATE_NEGOTIATED) {
+ vdpa_nic->status |= VIRTIO_CONFIG_S_DRIVER_OK;
+ rc = start_vdpa_device(vdpa_nic);
+ if (rc) {
+ dev_err(&vdpa_nic->vdpa_dev.dev,
+ "%s: vDPA device failed:%d\n",
__func__, rc);
+ vdpa_nic->status &= ~VIRTIO_CONFIG_S_DRIVER_OK;
+ goto unlock_return;
+ }
+ new_status &= ~VIRTIO_CONFIG_S_DRIVER_OK;
+ }
+ if (new_status) {
+ dev_warn(&vdev->dev,
+ "%s: Mismatch Status: %x & State: %u\n",
+ __func__, new_status, vdpa_nic->vdpa_state);
+ }
+
+unlock_return:
+ mutex_unlock(&vdpa_nic->lock);
+}
+
static size_t ef100_vdpa_get_config_size(struct vdpa_device *vdev)
{
return sizeof(struct virtio_net_config);
@@ -393,6 +684,20 @@ static void ef100_vdpa_set_config(struct
vdpa_device *vdev, unsigned int offset,
vdpa_nic->mac_configured = true;
}
+static int ef100_vdpa_suspend(struct vdpa_device *vdev)
+{
+ struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
+ int i, rc;
+
+ mutex_lock(&vdpa_nic->lock);
+ for (i = 0; i < (vdpa_nic->max_queue_pairs * 2); i++) {
+ rc = delete_vring(vdpa_nic, i);
config space should not be changed. But the code here only suspends
the vring, is this intende/d?
(eg. MAC address) using .set_config() after suspend operation? Is
there any other user triggered operation that falls in this category?
Updating MAC should be prohibited, one typical use case is the link status.
Could you pls elaborate on the requirement during device reset?
Reset may have the same issue.
I meant ef100_reset_vdpa_device() may suffer from the same issue:
It only reset all the vring but not the config space?
Thanks
[1]
Thanks
https://patchew.org/QEMU/20230213191929.1547497-1-eperezma@xxxxxxxxxx/
+ if (rc)
+ break;
+ }
+ mutex_unlock(&vdpa_nic->lock);
+ return rc;
+}
static void ef100_vdpa_free(struct vdpa_device *vdev)
{
struct ef100_vdpa_nic *vdpa_nic = get_vdpa_nic(vdev);
@@ -428,9 +733,13 @@ const struct vdpa_config_ops
ef100_vdpa_config_ops = {
.get_vq_num_max = ef100_vdpa_get_vq_num_max,
.get_device_id = ef100_vdpa_get_device_id,
.get_vendor_id = ef100_vdpa_get_vendor_id,
+ .get_status = ef100_vdpa_get_status,
+ .set_status = ef100_vdpa_set_status,
+ .reset = ef100_vdpa_reset,
.get_config_size = ef100_vdpa_get_config_size,
.get_config = ef100_vdpa_get_config,
.set_config = ef100_vdpa_set_config,
.get_generation = NULL,
+ .suspend = ef100_vdpa_suspend,
.free = ef100_vdpa_free,
};
--
2.30.1