Re: [PATCH v12 5/7] vfio-pci/zdev: Add a device feature for error information
From: Farhan Ali
Date: Tue Apr 07 2026 - 14:15:34 EST
On 4/7/2026 8:53 AM, Alex Williamson wrote:
On Mon, 30 Mar 2026 10:40:09 -0700
Farhan Ali <alifm@xxxxxxxxxxxxx> wrote:
For zPCI devices, we have platform specific error information. The platformInconsistent that this isn't a helper exported from the previous patch.
firmware provides this error information to the operating system in an
architecture specific mechanism. To enable recovery from userspace for
these devices, we want to expose this error information to userspace. Add a
new device feature to expose this information.
Signed-off-by: Farhan Ali <alifm@xxxxxxxxxxxxx>
---
drivers/vfio/pci/vfio_pci_core.c | 2 ++
drivers/vfio/pci/vfio_pci_priv.h | 9 ++++++++
drivers/vfio/pci/vfio_pci_zdev.c | 36 ++++++++++++++++++++++++++++++++
include/uapi/linux/vfio.h | 18 ++++++++++++++++
4 files changed, 65 insertions(+)
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index d43745fe4c84..bbdb625e35ef 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1534,6 +1534,8 @@ int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
return vfio_pci_core_feature_token(vdev, flags, arg, argsz);
case VFIO_DEVICE_FEATURE_DMA_BUF:
return vfio_pci_core_feature_dma_buf(vdev, flags, arg, argsz);
+ case VFIO_DEVICE_FEATURE_ZPCI_ERROR:
+ return vfio_pci_zdev_feature_err(device, flags, arg, argsz);
default:
return -ENOTTY;
}
diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
index 27ac280f00b9..647a9e8f348d 100644
--- a/drivers/vfio/pci/vfio_pci_priv.h
+++ b/drivers/vfio/pci/vfio_pci_priv.h
@@ -89,6 +89,8 @@ int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
struct vfio_info_cap *caps);
int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev);
void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev);
+int vfio_pci_zdev_feature_err(struct vfio_device *device, u32 flags,
+ void __user *arg, size_t argsz);
#else
static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
struct vfio_info_cap *caps)
@@ -103,6 +105,13 @@ static inline int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev)
static inline void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev)
{}
+
+static inline int vfio_pci_zdev_feature_err(struct vfio_device *device,
+ u32 flags, void __user *arg,
+ size_t argsz)
+{
+ return -ENOTTY;
+}
#endif
static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c
index 0658095ac5b1..0a8663879eee 100644
--- a/drivers/vfio/pci/vfio_pci_zdev.c
+++ b/drivers/vfio/pci/vfio_pci_zdev.c
@@ -141,6 +141,42 @@ int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
return ret;
}
+int vfio_pci_zdev_feature_err(struct vfio_device *device, u32 flags,
+ void __user *arg, size_t argsz)
+{
+ struct vfio_device_feature_zpci_err err = {};
+ struct vfio_pci_core_device *vdev;
+ struct zpci_dev *zdev;
+ int head = 0;
+ int ret;
+
+ vdev = container_of(device, struct vfio_pci_core_device, vdev);
+ zdev = to_zpci(vdev->pdev);
+ if (!zdev)
+ return -ENODEV;
+
+ ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
+ sizeof(err));
+ if (ret != 1)
+ return ret;
+
+ mutex_lock(&zdev->pending_errs_lock);
+ if (zdev->pending_errs.count) {
+ head = zdev->pending_errs.head % ZPCI_ERR_PENDING_MAX;
+ err.pec = zdev->pending_errs.err[head].pec;
+ zdev->pending_errs.head++;
+ zdev->pending_errs.count--;
+ err.pending_errors = zdev->pending_errs.count;
+ }
+ mutex_unlock(&zdev->pending_errs_lock);
Do you prefer it to be in a helper function? I can move it to a helper function.
What's the meaning of err.pec = 0? Could this be interpreted as an
error itself?
An err.pec = 0 would indicate there are no pending errors. I don't think anything would prevent userspace from doing a VFIO_DEVICE_FEATURE_GET even if not nudged by an eventfd?
+12 is used in linux-next by the PRECOPY_INFOv2 feature. Thanks,
+ err.version = 1;
+ if (copy_to_user(arg, &err, sizeof(err)))
+ return -EFAULT;
+
+ return 0;
+}
+
int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev)
{
struct zpci_dev *zdev = to_zpci(vdev->pdev);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index bb7b89330d35..2552eef42000 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1510,6 +1510,24 @@ struct vfio_device_feature_dma_buf {
struct vfio_region_dma_range dma_ranges[] __counted_by(nr_ranges);
};
+/**
+ * VFIO_DEVICE_FEATURE_ZPCI_ERROR feature provides PCI error information to
+ * userspace for vfio-pci devices on s390x. On s390x PCI error recovery
+ * involves platform firmware and notification to operating system is done
+ * by architecture specific mechanism. Exposing this information to
+ * userspace allows userspace to take appropriate actions to handle an
+ * error on the device. The pending_errors provide any additional errors
+ * pending for the device, and userspace should read until zero.
+ */
+
+struct vfio_device_feature_zpci_err {
+ __u8 version;
+ __u8 pending_errors;
+ __u16 pec;
+};
+
+#define VFIO_DEVICE_FEATURE_ZPCI_ERROR 12
Alex
Ah okay, will update this. I think my next rebase would be on the new kernel (7.1) and that should have the conflicting PCI patches.
Thanks
Farhan
+
/* -------- API for Type1 VFIO IOMMU -------- */
/**