Re: [PATCH 1/5] iommu/s390: Make attach succeed even if the device is in error state
From: Matthew Rosato
Date: Fri Oct 28 2022 - 11:55:57 EST
On 10/18/22 10:51 AM, Niklas Schnelle wrote:
> If a zPCI device is in the error state while switching IOMMU domains
> zpci_register_ioat() will fail and we would end up with the device not
> attached to any domain. In this state since zdev->dma_table == NULL
> a reset via zpci_hot_reset_device() would wrongfully re-initialize the
> device for DMA API usage using zpci_dma_init_device(). As automatic
> recovery is currently disabled while attached to an IOMMU domain this
> only affects slot resets triggered through other means but will affect
> automatic recovery once we switch to using dma-iommu.
>
> Additionally with that switch common code expects attaching to the
> default domain to always work so zpci_register_ioat() should only fail
> if there is no chance to recover anyway, e.g. if the device has been
> unplugged.
>
> Improve the robustness of attach by specifically looking at the status
> returned by zpci_mod_fc() to determine if the device is unavailable and
> in this case simply ignore the error. Once the device is reset
> zpci_hot_reset_device() will then correctly set the domain's DMA
> translation tables.
>
> Signed-off-by: Niklas Schnelle <schnelle@xxxxxxxxxxxxx>
Seems reasonable to me.
Reviewed-by: Matthew Rosato <mjrosato@xxxxxxxxxxxxx>
> ---
> arch/s390/include/asm/pci.h | 2 +-
> arch/s390/kvm/pci.c | 6 ++++--
> arch/s390/pci/pci.c | 11 ++++++-----
> arch/s390/pci/pci_dma.c | 3 ++-
> drivers/iommu/s390-iommu.c | 9 +++++++--
> 5 files changed, 20 insertions(+), 11 deletions(-)
>
> diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
> index 15f8714ca9b7..07361e2fd8c5 100644
> --- a/arch/s390/include/asm/pci.h
> +++ b/arch/s390/include/asm/pci.h
> @@ -221,7 +221,7 @@ void zpci_device_reserved(struct zpci_dev *zdev);
> bool zpci_is_device_configured(struct zpci_dev *zdev);
>
> int zpci_hot_reset_device(struct zpci_dev *zdev);
> -int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
> +int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64, u8 *);
> int zpci_unregister_ioat(struct zpci_dev *, u8);
> void zpci_remove_reserved_devices(void);
> void zpci_update_fh(struct zpci_dev *zdev, u32 fh);
> diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
> index c50c1645c0ae..03964c0e1fdf 100644
> --- a/arch/s390/kvm/pci.c
> +++ b/arch/s390/kvm/pci.c
> @@ -434,6 +434,7 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
> static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
> {
> struct zpci_dev *zdev = opaque;
> + u8 status;
> int rc;
>
> if (!zdev)
> @@ -486,7 +487,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
>
> /* Re-register the IOMMU that was already created */
> rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
> - virt_to_phys(zdev->dma_table));
> + virt_to_phys(zdev->dma_table), &status);
> if (rc)
> goto clear_gisa;
>
> @@ -516,6 +517,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
> {
> struct zpci_dev *zdev = opaque;
> struct kvm *kvm;
> + u8 status;
>
> if (!zdev)
> return;
> @@ -554,7 +556,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
>
> /* Re-register the IOMMU that was already created */
> zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
> - virt_to_phys(zdev->dma_table));
> + virt_to_phys(zdev->dma_table), &status);
>
> out:
> spin_lock(&kvm->arch.kzdev_list_lock);
> diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
> index 73cdc5539384..a703dcd94a68 100644
> --- a/arch/s390/pci/pci.c
> +++ b/arch/s390/pci/pci.c
> @@ -116,20 +116,20 @@ EXPORT_SYMBOL_GPL(pci_proc_domain);
>
> /* Modify PCI: Register I/O address translation parameters */
> int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
> - u64 base, u64 limit, u64 iota)
> + u64 base, u64 limit, u64 iota, u8 *status)
> {
> u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT);
> struct zpci_fib fib = {0};
> - u8 cc, status;
> + u8 cc;
>
> WARN_ON_ONCE(iota & 0x3fff);
> fib.pba = base;
> fib.pal = limit;
> fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
> fib.gd = zdev->gisa;
> - cc = zpci_mod_fc(req, &fib, &status);
> + cc = zpci_mod_fc(req, &fib, status);
> if (cc)
> - zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
> + zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, *status);
> return cc;
> }
> EXPORT_SYMBOL_GPL(zpci_register_ioat);
> @@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(zpci_disable_device);
> */
> int zpci_hot_reset_device(struct zpci_dev *zdev)
> {
> + u8 status;
> int rc;
>
> zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
> @@ -787,7 +788,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
>
> if (zdev->dma_table)
> rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
> - virt_to_phys(zdev->dma_table));
> + virt_to_phys(zdev->dma_table), &status);
> else
> rc = zpci_dma_init_device(zdev);
> if (rc) {
> diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
> index 227cf0a62800..dee825ee7305 100644
> --- a/arch/s390/pci/pci_dma.c
> +++ b/arch/s390/pci/pci_dma.c
> @@ -547,6 +547,7 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
>
> int zpci_dma_init_device(struct zpci_dev *zdev)
> {
> + u8 status;
> int rc;
>
> /*
> @@ -598,7 +599,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
>
> }
> if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
> - virt_to_phys(zdev->dma_table))) {
> + virt_to_phys(zdev->dma_table), &status)) {
> rc = -EIO;
> goto free_bitmap;
> }
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index 6c407b61b25a..ee88e717254b 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -98,6 +98,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
> struct s390_domain *s390_domain = to_s390_domain(domain);
> struct zpci_dev *zdev = to_zpci_dev(dev);
> unsigned long flags;
> + u8 status;
> int cc;
>
> if (!zdev)
> @@ -113,8 +114,12 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
> zpci_dma_exit_device(zdev);
>
> cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
> - virt_to_phys(s390_domain->dma_table));
> - if (cc)
> + virt_to_phys(s390_domain->dma_table), &status);
> + /*
> + * If the device is undergoing error recovery the reset code
> + * will re-establish the new domain.
> + */
> + if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
> return -EIO;
> zdev->dma_table = s390_domain->dma_table;
>