[RFC KERNEL PATCH v8 1/3] xen/pci: Add xen_reset_device_function_state

From: Jiqian Chen
Date: Fri Jun 07 2024 - 03:51:57 EST


When device on dom0 side has been reset, the vpci on Xen side
won't get notification, so that the cached state in vpci is
all out of date with the real device state.
To solve that problem, add a new function to clear all vpci
device state when device is reset on dom0 side.

And call that function in pcistub_init_device. Because when
using "pci-assignable-add" to assign a passthrough device in
Xen, it will reset passthrough device and the vpci state will
out of date, and then device will fail to restore bar state.

Signed-off-by: Huang Rui <ray.huang@xxxxxxx>
Signed-off-by: Jiqian Chen <Jiqian.Chen@xxxxxxx>
Reviewed-by: Stefano Stabellini <sstabellini@xxxxxxxxxx>
---
RFC: it need to wait for the corresponding first patch on xen side to be merged.
---
drivers/xen/pci.c | 25 +++++++++++++++++++++++++
drivers/xen/xen-pciback/pci_stub.c | 18 +++++++++++++++---
include/xen/interface/physdev.h | 7 +++++++
include/xen/pci.h | 6 ++++++
4 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index 72d4e3f193af..57093e395982 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -177,6 +177,31 @@ static int xen_remove_device(struct device *dev)
return r;
}

+enum pci_device_state_reset_type {
+ DEVICE_RESET_FLR,
+ DEVICE_RESET_COLD,
+ DEVICE_RESET_WARM,
+ DEVICE_RESET_HOT,
+};
+
+struct pci_device_state_reset {
+ struct physdev_pci_device dev;
+ enum pci_device_state_reset_type reset_type;
+};
+
+int xen_reset_device_function_state(const struct pci_dev *dev)
+{
+ struct pci_device_state_reset device = {
+ .dev.seg = pci_domain_nr(dev->bus),
+ .dev.bus = dev->bus->number,
+ .dev.devfn = dev->devfn,
+ .reset_type = DEVICE_RESET_FLR,
+ };
+
+ return HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_state_reset, &device);
+}
+EXPORT_SYMBOL_GPL(xen_reset_device_function_state);
+
static int xen_pci_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index e34b623e4b41..73062e531c34 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -89,6 +89,16 @@ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
return psdev;
}

+static int pcistub_reset_device_state(struct pci_dev *dev)
+{
+ __pci_reset_function_locked(dev);
+
+ if (!xen_pv_domain())
+ return xen_reset_device_function_state(dev);
+ else
+ return 0;
+}
+
/* Don't call this directly as it's called by pcistub_device_put */
static void pcistub_device_release(struct kref *kref)
{
@@ -107,7 +117,7 @@ static void pcistub_device_release(struct kref *kref)
/* Call the reset function which does not take lock as this
* is called from "unbind" which takes a device_lock mutex.
*/
- __pci_reset_function_locked(dev);
+ pcistub_reset_device_state(dev);
if (dev_data &&
pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state))
dev_info(&dev->dev, "Could not reload PCI state\n");
@@ -284,7 +294,7 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
* (so it's ready for the next domain)
*/
device_lock_assert(&dev->dev);
- __pci_reset_function_locked(dev);
+ pcistub_reset_device_state(dev);

dev_data = pci_get_drvdata(dev);
ret = pci_load_saved_state(dev, dev_data->pci_saved_state);
@@ -420,7 +430,9 @@ static int pcistub_init_device(struct pci_dev *dev)
dev_err(&dev->dev, "Could not store PCI conf saved state!\n");
else {
dev_dbg(&dev->dev, "resetting (FLR, D3, etc) the device\n");
- __pci_reset_function_locked(dev);
+ err = pcistub_reset_device_state(dev);
+ if (err)
+ goto config_release;
pci_restore_state(dev);
}
/* Now disable the device (this also ensures some private device
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index a237af867873..b50646c993dd 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -256,6 +256,13 @@ struct physdev_pci_device_add {
*/
#define PHYSDEVOP_prepare_msix 30
#define PHYSDEVOP_release_msix 31
+/*
+ * Notify the hypervisor that a PCI device has been reset, so that any
+ * internally cached state is regenerated. Should be called after any
+ * device reset performed by the hardware domain.
+ */
+#define PHYSDEVOP_pci_device_state_reset 32
+
struct physdev_pci_device {
/* IN */
uint16_t seg;
diff --git a/include/xen/pci.h b/include/xen/pci.h
index b8337cf85fd1..7941809ab729 100644
--- a/include/xen/pci.h
+++ b/include/xen/pci.h
@@ -4,10 +4,16 @@
#define __XEN_PCI_H__

#if defined(CONFIG_XEN_DOM0)
+int xen_reset_device_function_state(const struct pci_dev *dev);
int xen_find_device_domain_owner(struct pci_dev *dev);
int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
int xen_unregister_device_domain_owner(struct pci_dev *dev);
#else
+static inline int xen_reset_device_function_state(const struct pci_dev *dev)
+{
+ return -1;
+}
+
static inline int xen_find_device_domain_owner(struct pci_dev *dev)
{
return -1;
--
2.34.1