Re: [PATCH v2 25/30] vfio-pci/zdev: wire up zPCI interpretive execution support

From: Pierre Morel
Date: Tue Jan 25 2022 - 08:02:10 EST




On 1/14/22 21:31, Matthew Rosato wrote:
Introduce support for VFIO_DEVICE_FEATURE_ZPCI_INTERP, which is a new
VFIO_DEVICE_FEATURE ioctl. This interface is used to indicate that an
s390x vfio-pci device wishes to enable/disable zPCI interpretive
execution, which allows zPCI instructions to be executed directly by
underlying firmware without KVM involvement.

Signed-off-by: Matthew Rosato <mjrosato@xxxxxxxxxxxxx>
---
arch/s390/include/asm/kvm_pci.h | 1 +
drivers/vfio/pci/vfio_pci_core.c | 2 +
drivers/vfio/pci/vfio_pci_zdev.c | 78 ++++++++++++++++++++++++++++++++
include/linux/vfio_pci_core.h | 10 ++++
include/uapi/linux/vfio.h | 7 +++
include/uapi/linux/vfio_zdev.h | 15 ++++++
6 files changed, 113 insertions(+)

diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h
index 97a90b37c87d..dc00c3f27a00 100644
--- a/arch/s390/include/asm/kvm_pci.h
+++ b/arch/s390/include/asm/kvm_pci.h
@@ -35,6 +35,7 @@ struct kvm_zdev {
struct kvm_zdev_ioat ioat;
struct zpci_fib fib;
struct notifier_block nb;
+ bool interp;

NIT: s/interp/interpretation/ ?

};
int kvm_s390_pci_dev_open(struct zpci_dev *zdev);
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index fc57d4d0abbe..2b2d64a2190c 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1172,6 +1172,8 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
mutex_unlock(&vdev->vf_token->lock);
return 0;
+ case VFIO_DEVICE_FEATURE_ZPCI_INTERP:
+ return vfio_pci_zdev_feat_interp(vdev, feature, arg);
default:
return -ENOTTY;
}
diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c
index 5c2bddc57b39..4339f48b98bc 100644
--- a/drivers/vfio/pci/vfio_pci_zdev.c
+++ b/drivers/vfio/pci/vfio_pci_zdev.c
@@ -54,6 +54,10 @@ static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
.version = zdev->version
};
+ /* Some values are different for interpreted devices */
+ if (zdev->kzdev && zdev->kzdev->interp)
+ cap.maxstbl = zdev->maxstbl;
+
return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
}
@@ -138,6 +142,72 @@ int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
return ret;
}
+int vfio_pci_zdev_feat_interp(struct vfio_pci_core_device *vdev,
+ struct vfio_device_feature feature,
+ unsigned long arg)
+{
+ struct zpci_dev *zdev = to_zpci(vdev->pdev);
+ struct vfio_device_zpci_interp *data;
+ struct vfio_device_feature *feat;
+ unsigned long minsz;
+ int size, rc;
+
+ if (!zdev || !zdev->kzdev)
+ return -EINVAL;
+
+ /* If PROBE specified, return probe results immediately */
+ if (feature.flags & VFIO_DEVICE_FEATURE_PROBE)
+ return kvm_s390_pci_interp_probe(zdev);
+
+ /* GET and SET are mutually exclusive */
+ if ((feature.flags & VFIO_DEVICE_FEATURE_GET) &&
+ (feature.flags & VFIO_DEVICE_FEATURE_SET))
+ return -EINVAL;

Isn't the check already done in VFIO core?

+
+ size = sizeof(*feat) + sizeof(*data);
+ feat = kzalloc(size, GFP_KERNEL);
+ if (!feat)
+ return -ENOMEM;
+
+ data = (struct vfio_device_zpci_interp *)&feat->data;
+ minsz = offsetofend(struct vfio_device_feature, flags);
+
+ if (feature.argsz < minsz + sizeof(*data))
+ return -EINVAL;
+
+ /* Get the rest of the payload for GET/SET */
+ rc = copy_from_user(data, (void __user *)(arg + minsz),
+ sizeof(*data));
+ if (rc)
+ rc = -EINVAL;
+
+ if (feature.flags & VFIO_DEVICE_FEATURE_GET) {
+ if (zdev->gd != 0)
+ data->flags = VFIO_DEVICE_ZPCI_FLAG_INTERP;
+ else
+ data->flags = 0;
+ data->fh = zdev->fh;
+ /* userspace is using host fh, give interpreted clp values */
+ zdev->kzdev->interp = true;
+
+ if (copy_to_user((void __user *)arg, feat, size))
+ rc = -EFAULT;
+ } else if (feature.flags & VFIO_DEVICE_FEATURE_SET) {
+ if (data->flags == VFIO_DEVICE_ZPCI_FLAG_INTERP)
+ rc = kvm_s390_pci_interp_enable(zdev);
+ else if (data->flags == 0)
+ rc = kvm_s390_pci_interp_disable(zdev);
+ else
+ rc = -EINVAL;
+ } else {
+ /* Neither GET nor SET were specified */
+ rc = -EINVAL;
+ }
+
+ kfree(feat);
+ return rc;
+}
+
static int vfio_pci_zdev_group_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -164,6 +234,7 @@ void vfio_pci_zdev_open(struct vfio_pci_core_device *vdev)
return;
zdev->kzdev->nb.notifier_call = vfio_pci_zdev_group_notifier;
+ zdev->kzdev->interp = false;
if (vfio_register_notifier(vdev->vdev.dev, VFIO_GROUP_NOTIFY,
&events, &zdev->kzdev->nb))
@@ -180,5 +251,12 @@ void vfio_pci_zdev_release(struct vfio_pci_core_device *vdev)
vfio_unregister_notifier(vdev->vdev.dev, VFIO_GROUP_NOTIFY,
&zdev->kzdev->nb);
+ /*
+ * If the device was using interpretation, don't trust that userspace
+ * did the appropriate cleanup
+ */
+ if (zdev->gd != 0)
+ kvm_s390_pci_interp_disable(zdev);
+
kvm_s390_pci_dev_release(zdev);
}
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 05287f8ac855..0db2b1051931 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -198,6 +198,9 @@ static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev)
#ifdef CONFIG_VFIO_PCI_ZDEV
extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
struct vfio_info_cap *caps);
+int vfio_pci_zdev_feat_interp(struct vfio_pci_core_device *vdev,
+ struct vfio_device_feature feature,
+ unsigned long arg);
void vfio_pci_zdev_open(struct vfio_pci_core_device *vdev);
void vfio_pci_zdev_release(struct vfio_pci_core_device *vdev);
#else
@@ -207,6 +210,13 @@ static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
return -ENODEV;
}
+static inline int vfio_pci_zdev_feat_interp(struct vfio_pci_core_device *vdev,
+ struct vfio_device_feature feature,
+ unsigned long arg)
+{
+ return -ENOTTY;
+}
+
static inline void vfio_pci_zdev_open(struct vfio_pci_core_device *vdev)
{
}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index ef33ea002b0b..b9a75485b8e7 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1002,6 +1002,13 @@ struct vfio_device_feature {
*/
#define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0)
+/*
+ * Provide support for enabling interpretation of zPCI instructions. This
+ * feature is only valid for s390x PCI devices. Data provided when setting
+ * and getting this feature is futher described in vfio_zdev.h
+ */
+#define VFIO_DEVICE_FEATURE_ZPCI_INTERP (1)
+
/* -------- API for Type1 VFIO IOMMU -------- */
/**
diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h
index b4309397b6b2..575f0410dc66 100644
--- a/include/uapi/linux/vfio_zdev.h
+++ b/include/uapi/linux/vfio_zdev.h
@@ -75,4 +75,19 @@ struct vfio_device_info_cap_zpci_pfip {
__u8 pfip[];
};
+/**
+ * VFIO_DEVICE_FEATURE_ZPCI_INTERP
+ *
+ * This feature is used for enabling zPCI instruction interpretation for a
+ * device. No data is provided when setting this feature. When getting
+ * this feature, the following structure is provided which details whether
+ * or not interpretation is active and provides the guest with host device
+ * information necessary to enable interpretation.
+ */
+struct vfio_device_zpci_interp {
+ __u64 flags;
+#define VFIO_DEVICE_ZPCI_FLAG_INTERP 1
+ __u32 fh; /* Host device function handle */
+};
+
#endif


Otherwise LGTM

--
Pierre Morel
IBM Lab Boeblingen