[PATCH 1/1] PCI: Add pci reset quirk for Nvidia GPUs

From: Shanker Donthineni
Date: Fri Apr 23 2021 - 10:55:35 EST


On select platforms, some Nvidia GPU devices require platform-specific
quirks around device reset, and these GPUs do not work with FLR/SBR.
For these devices, add a quirk to handle the device reset in firmware.
Platforms that need the device reset quirk expose the firmware reset
method for the affected devices and the GPUs in these platforms have
a unique device ID range.

This reset issue will be fixed in the next generation of hardware.

Signed-off-by: Shanker Donthineni <sdonthineni@xxxxxxxxxx>
---
drivers/pci/quirks.c | 54 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 54 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 653660e3ba9e..23fc90d209c2 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3913,6 +3913,59 @@ static int delay_250ms_after_flr(struct pci_dev *dev, int probe)
return 0;
}

+/*
+ * Some Nvidia GPU devices do not work with bus reset, SBR needs to be
+ * prevented for those affected devices.
+ */
+static void quirk_nvidia_no_bus_reset(struct pci_dev *dev)
+{
+ if ((dev->device & 0xffc0) == 0x2340)
+ dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+ quirk_nvidia_no_bus_reset);
+
+/*
+ * Some Nvidia GPU devices do not work with standard resets. These GPU
+ * devices are only in select systems and those systems have _RST method
+ * defined in the firmware. This quirk invokes a _RST() on the associated
+ * device to fix the reset issue.
+ */
+static int reset_nvidia_gpu_quirk(struct pci_dev *dev, int probe)
+{
+#ifdef CONFIG_ACPI
+ acpi_handle handle = ACPI_HANDLE(&dev->dev);
+
+ /*
+ * Check for the affected devices' ID range. If device is not in
+ * the affected range, return -ENOTTY indicating no device
+ * specific reset method is available.
+ */
+ if ((dev->device & 0xffc0) != 0x2340)
+ return -ENOTTY;
+
+ /*
+ * Return -ENOTTY indicating no device-specific reset method if _RST
+ * method is not defined
+ */
+ if (!handle || !acpi_has_method(handle, "_RST"))
+ return -ENOTTY;
+
+ /* Return 0 for probe phase indicating that we can reset this device */
+ if (probe)
+ return 0;
+
+ /* Invoke _RST() method to perform the device-specific reset */
+ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_RST", NULL, NULL))) {
+ pci_warn(dev, "Failed to reset the device\n");
+ return -EINVAL;
+ }
+ return 0;
+#else
+ return -ENOTTY;
+#endif
+}
+
static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF,
reset_intel_82599_sfp_virtfn },
@@ -3924,6 +3977,7 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
{ PCI_VENDOR_ID_INTEL, 0x0953, delay_250ms_after_flr },
{ PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
reset_chelsio_generic_dev },
+ { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, reset_nvidia_gpu_quirk },
{ 0 }
};

--
2.17.1