Re: [PATCH v17 5/7] vfio-pci/zdev: Add a device feature for error information

From: Alex Williamson

Date: Tue Jun 02 2026 - 18:34:11 EST


On Tue, 2 Jun 2026 14:38:56 -0700
Farhan Ali <alifm@xxxxxxxxxxxxx> wrote:

> On 6/2/2026 1:32 PM, Alex Williamson wrote:
> > On Wed, 20 May 2026 10:11:11 -0700
> > Farhan Ali <alifm@xxxxxxxxxxxxx> wrote:
> >
> >> For zPCI devices, we have platform specific error information. The platform
> >> firmware provides this error information to the operating system in an
> >> architecture specific mechanism. To enable recovery from userspace for
> >> these devices, we want to expose this error information to userspace. Add a
> >> new device feature to expose this information.
> >>
> >> Reviewed-by: Matthew Rosato <mjrosato@xxxxxxxxxxxxx>
> >> Reviewed-by: Niklas Schnelle <schnelle@xxxxxxxxxxxxx>
> >> Signed-off-by: Farhan Ali <alifm@xxxxxxxxxxxxx>
> >> ---
> >> arch/s390/include/asm/pci.h | 2 ++
> >> arch/s390/pci/pci_event.c | 19 ++++++++++++++++
> >> drivers/vfio/pci/vfio_pci_core.c | 2 ++
> >> drivers/vfio/pci/vfio_pci_priv.h | 9 ++++++++
> >> drivers/vfio/pci/vfio_pci_zdev.c | 39 ++++++++++++++++++++++++++++++++
> >> include/uapi/linux/vfio.h | 30 ++++++++++++++++++++++++
> >> 6 files changed, 101 insertions(+)
> >>
> >> diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
> >> index 016386f7ef4a..88a125b92bdd 100644
> >> --- a/arch/s390/include/asm/pci.h
> >> +++ b/arch/s390/include/asm/pci.h
> >> @@ -364,6 +364,8 @@ int zpci_clear_error_state(struct zpci_dev *zdev);
> >> int zpci_reset_load_store_blocked(struct zpci_dev *zdev);
> >> void zpci_start_mediated_recovery(struct zpci_dev *zdev);
> >> void zpci_stop_mediated_recovery(struct zpci_dev *zdev);
> >> +int zpci_get_pending_error(struct zpci_dev *zdev,
> >> + struct zpci_ccdf_err *ccdf);
> >>
> >> #ifdef CONFIG_NUMA
> >>
> >> diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
> >> index cf2ffa21ab8c..db1b44baf8fa 100644
> >> --- a/arch/s390/pci/pci_event.c
> >> +++ b/arch/s390/pci/pci_event.c
> >> @@ -75,6 +75,25 @@ static int zpci_store_pci_error(struct pci_dev *pdev,
> >> return 0;
> >> }
> >>
> >> +int zpci_get_pending_error(struct zpci_dev *zdev,
> >> + struct zpci_ccdf_err *ccdf)
> >> +{
> >> + int head = 0;
> >> +
> >> + guard(mutex)(&zdev->pending_errs_lock);
> >> +
> >> + if (!zdev->pending_errs.count)
> >> + return -ENOMSG;
> >> +
> >> + head = zdev->pending_errs.head % ZPCI_ERR_PENDING_MAX;
> >> + memcpy(ccdf, &zdev->pending_errs.err[head],
> >> + sizeof(struct zpci_ccdf_err));
> >> + zdev->pending_errs.head++;
> >> + zdev->pending_errs.count--;
> >> + return 0;
> >> +}
> >> +EXPORT_SYMBOL_GPL(zpci_get_pending_error);
> >> +
> >> void zpci_start_mediated_recovery(struct zpci_dev *zdev)
> >> {
> >> guard(mutex)(&zdev->pending_errs_lock);
> >> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> >> index 050e7542952e..27642f10fe97 100644
> >> --- a/drivers/vfio/pci/vfio_pci_core.c
> >> +++ b/drivers/vfio/pci/vfio_pci_core.c
> >> @@ -1569,6 +1569,8 @@ int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
> >> return vfio_pci_core_feature_token(vdev, flags, arg, argsz);
> >> case VFIO_DEVICE_FEATURE_DMA_BUF:
> >> return vfio_pci_core_feature_dma_buf(vdev, flags, arg, argsz);
> >> + case VFIO_DEVICE_FEATURE_ZPCI_ERROR:
> >> + return vfio_pci_zdev_feature_err(device, flags, arg, argsz);
> >> default:
> >> return -ENOTTY;
> >> }
> >> diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
> >> index fca9d0dfac90..4e7162234a2e 100644
> >> --- a/drivers/vfio/pci/vfio_pci_priv.h
> >> +++ b/drivers/vfio/pci/vfio_pci_priv.h
> >> @@ -93,6 +93,8 @@ int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
> >> struct vfio_info_cap *caps);
> >> int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev);
> >> void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev);
> >> +int vfio_pci_zdev_feature_err(struct vfio_device *device, u32 flags,
> >> + void __user *arg, size_t argsz);
> >> #else
> >> static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
> >> struct vfio_info_cap *caps)
> >> @@ -107,6 +109,13 @@ static inline int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev)
> >>
> >> static inline void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev)
> >> {}
> >> +
> >> +static inline int vfio_pci_zdev_feature_err(struct vfio_device *device,
> >> + u32 flags, void __user *arg,
> >> + size_t argsz)
> >> +{
> >> + return -ENOTTY;
> >> +}
> >> #endif
> >>
> >> static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
> >> diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c
> >> index 78a28db00c6d..cc148123a97b 100644
> >> --- a/drivers/vfio/pci/vfio_pci_zdev.c
> >> +++ b/drivers/vfio/pci/vfio_pci_zdev.c
> >> @@ -141,6 +141,45 @@ int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
> >> return ret;
> >> }
> >>
> >> +int vfio_pci_zdev_feature_err(struct vfio_device *device, u32 flags,
> >> + void __user *arg, size_t argsz)
> >> +{
> >> + struct vfio_device_feature_zpci_err err = {};
> >> + struct vfio_pci_core_device *vdev;
> >> + struct zpci_ccdf_err ccdf = {};
> >> + struct zpci_dev *zdev;
> >> + int ret;
> >> +
> >> + vdev = container_of(device, struct vfio_pci_core_device, vdev);
> >> + zdev = to_zpci(vdev->pdev);
> >> + if (!zdev)
> >> + return -ENODEV;
> >> +
> >> + ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
> >> + sizeof(err));
> >> + if (ret != 1)
> >> + return ret;
> >> +
> >> + ret = zpci_get_pending_error(zdev, &ccdf);
> >> + if (ret)
> >> + return ret;
> >> +
> >> + err.fh = ccdf.fh;
> >> + err.fid = ccdf.fid;
> >> + err.ett = ccdf.ett;
> >> + err.mvn = ccdf.mvn;
> >> + err.dmaas = ccdf.dmaas;
> >> + err.q = ccdf.q;
> >> + err.rw = ccdf.rw;
> >> + err.faddr = ccdf.faddr;
> >> + err.pec = ccdf.pec;
> >> +
> >> + if (copy_to_user(arg, &err, sizeof(err)))
> >> + return -EFAULT;
> >> +
> >> + return 0;
> >> +}
> > There's a concern here that the error is dequeued but we can still fail
> > resulting in a lost error. Is that a sufficiently unlikely scenario to
> > ignore or should it at least be documented?
>
> IMHO this scenario would be unlikely and could be ignored. I think to
> handle this we would need something like a peek() API and only remove
> and update the pending_errs if copy_to_user succeeds. This would be
> adding more complexity for a relatively low possibility of failure.

Or just a dev_warn_ratelimited().

> >> +
> >> int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev)
> >> {
> >> struct zpci_dev *zdev = to_zpci(vdev->pdev);
> >> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> >> index 5de618a3a5ee..3eead86a00ab 100644
> >> --- a/include/uapi/linux/vfio.h
> >> +++ b/include/uapi/linux/vfio.h
> >> @@ -1534,6 +1534,36 @@ struct vfio_device_feature_dma_buf {
> >> */
> >> #define VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2 12
> >>
> >> +/**
> >> + * VFIO_DEVICE_FEATURE_ZPCI_ERROR feature provides PCI error information to
> >> + * userspace for vfio-pci devices on s390. On s390, PCI error recovery
> >> + * involves platform firmware and notification to operating systems is done
> >> + * by architecture specific mechanism. Exposing this information to
> >> + * userspace allows it to take appropriate actions to handle an
> >> + * error on the device. The ioctl returns -ENOMSG if there are no pending
> >> + * PCI errors.
> >> + */
> >> +
> >> +struct vfio_device_feature_zpci_err {
> >> + __u32 feature_flags; /* Indicate future features */
> >> + __u32 reserved1;
> >> + __u32 fh; /* function handle */
> >> + __u32 fid; /* function id */
> >> + __u32 ett : 4; /* expected table type */
> >> + __u32 mvn : 12; /* MSI vector number */
> >> + __u32 dmaas : 8; /* DMA address space */
> >> + __u32 reserved2 : 6;
> >> + __u32 q : 1; /* event qualifier */
> >> + __u32 rw : 1; /* read/write */
> >> + __u64 faddr; /* failing address */
> >> + __u32 reserved3;
> >> + __u16 reserved4;
> >> + __u16 pec; /* PCI event code */
> >> + __u8 reserved5[28]; /* Allow for future expansion */
> >> +};
> >> +
> >> +#define VFIO_DEVICE_FEATURE_ZPCI_ERROR 13
> >> +
> > There are currently at least 4 different patch series, including this
> > one, that are vying for device feature allocations. This series spans
> > PCI, s390, and vfio, and I'm not entirely sure which maintainer should
> > take it. With Niklas' review on patch 4, should we ask Bjorn to take
> > 1-3 through PCI, share a branch, and I could merge 4-7 through vfio?
> > Thanks,
> >
> > Alex
>
> If it helps I can break the series into PCI only and vfio only patches.
> Please let me know if there is anything I can help with to make the
> merge easier.

Let's do that. Thanks,

Alex