Re: [PATCH v2 1/4] PCI: s390: Fix use-after-free of PCI resources with per-function hotplug

From: Bjorn Helgaas
Date: Wed Mar 08 2023 - 18:15:01 EST


On Mon, Mar 06, 2023 at 04:10:11PM +0100, Niklas Schnelle wrote:
> On s390 PCI functions may be hotplugged individually even when they
> belong to a multi-function device. In particular on an SR-IOV device VFs
> may be removed and later re-added.
>
> In commit a50297cf8235 ("s390/pci: separate zbus creation from
> scanning") it was missed however that struct pci_bus and struct
> zpci_bus's resource list retained a reference to the PCI functions MMIO
> resources even though those resources are released and freed on
> hot-unplug. These stale resources may subsequently be claimed when the
> PCI function re-appears resulting in use-after-free.
>
> One idea of fixing this use-after-free in s390 specific code that was
> investigated was to simply keep resources around from the moment a PCI
> function first appeared until the whole virtual PCI bus created for
> a multi-function device disappears. The problem with this however is
> that due to the requirement of artificial MMIO addreesses (address
> cookies) extra logic is then needed to keep the address cookies
> compatible on re-plug. At the same time the MMIO resources semantically
> belong to the PCI function so tying their lifecycle to the function
> seems more logical.
>
> Instead a simpler approach is to remove the resources of an individually
> hot-unplugged PCI function from the PCI bus's resource list while
> keeping the resources of other PCI functions on the PCI bus untouched.
>
> This is done by introducing pci_bus_remove_resource() to remove an
> individual resource. Similarly the resource also needs to be removed
> from the struct zpci_bus's resource list. It turns out however, that
> there is really no need to add the MMIO resources to the struct
> zpci_bus's resource list at all and instead we can simply use the
> zpci_bar_struct's resource pointer directly.
>
> Fixes: a50297cf8235 ("s390/pci: separate zbus creation from scanning")
> Signed-off-by: Niklas Schnelle <schnelle@xxxxxxxxxxxxx>

Acked-by: Bjorn Helgaas <bhelgaas@xxxxxxxxxx>

The meat of this is mostly in s390, so I think it makes more sense to
merge via that tree. But let me know if you'd rather that I take it.

> ---
> v1 -> v2:
> - Remove return at the end of function returning void
>
> arch/s390/pci/pci.c | 16 ++++++++++------
> arch/s390/pci/pci_bus.c | 12 +++++-------
> arch/s390/pci/pci_bus.h | 3 +--
> drivers/pci/bus.c | 21 +++++++++++++++++++++
> include/linux/pci.h | 1 +
> 5 files changed, 38 insertions(+), 15 deletions(-)
>
> diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
> index ef38b1514c77..e16afacc8fd1 100644
> --- a/arch/s390/pci/pci.c
> +++ b/arch/s390/pci/pci.c
> @@ -544,8 +544,7 @@ static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
> return r;
> }
>
> -int zpci_setup_bus_resources(struct zpci_dev *zdev,
> - struct list_head *resources)
> +int zpci_setup_bus_resources(struct zpci_dev *zdev)
> {
> unsigned long addr, size, flags;
> struct resource *res;
> @@ -581,7 +580,6 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev,
> return -ENOMEM;
> }
> zdev->bars[i].res = res;
> - pci_add_resource(resources, res);
> }
> zdev->has_resources = 1;
>
> @@ -590,17 +588,23 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev,
>
> static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
> {
> + struct resource *res;
> int i;
>
> + pci_lock_rescan_remove();
> for (i = 0; i < PCI_STD_NUM_BARS; i++) {
> - if (!zdev->bars[i].size || !zdev->bars[i].res)
> + res = zdev->bars[i].res;
> + if (!res)
> continue;
>
> + release_resource(res);
> + pci_bus_remove_resource(zdev->zbus->bus, res);
> zpci_free_iomap(zdev, zdev->bars[i].map_idx);
> - release_resource(zdev->bars[i].res);
> - kfree(zdev->bars[i].res);
> + zdev->bars[i].res = NULL;
> + kfree(res);
> }
> zdev->has_resources = 0;
> + pci_unlock_rescan_remove();
> }
>
> int pcibios_device_add(struct pci_dev *pdev)
> diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
> index 6a8da1b742ae..a99926af2b69 100644
> --- a/arch/s390/pci/pci_bus.c
> +++ b/arch/s390/pci/pci_bus.c
> @@ -41,9 +41,7 @@ static int zpci_nb_devices;
> */
> static int zpci_bus_prepare_device(struct zpci_dev *zdev)
> {
> - struct resource_entry *window, *n;
> - struct resource *res;
> - int rc;
> + int rc, i;
>
> if (!zdev_enabled(zdev)) {
> rc = zpci_enable_device(zdev);
> @@ -57,10 +55,10 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev)
> }
>
> if (!zdev->has_resources) {
> - zpci_setup_bus_resources(zdev, &zdev->zbus->resources);
> - resource_list_for_each_entry_safe(window, n, &zdev->zbus->resources) {
> - res = window->res;
> - pci_bus_add_resource(zdev->zbus->bus, res, 0);
> + zpci_setup_bus_resources(zdev);
> + for (i = 0; i < PCI_STD_NUM_BARS; i++) {
> + if (zdev->bars[i].res)
> + pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res, 0);
> }
> }
>
> diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
> index e96c9860e064..af9f0ac79a1b 100644
> --- a/arch/s390/pci/pci_bus.h
> +++ b/arch/s390/pci/pci_bus.h
> @@ -30,8 +30,7 @@ static inline void zpci_zdev_get(struct zpci_dev *zdev)
>
> int zpci_alloc_domain(int domain);
> void zpci_free_domain(int domain);
> -int zpci_setup_bus_resources(struct zpci_dev *zdev,
> - struct list_head *resources);
> +int zpci_setup_bus_resources(struct zpci_dev *zdev);
>
> static inline struct zpci_dev *zdev_from_bus(struct pci_bus *bus,
> unsigned int devfn)
> diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
> index 83ae838ceb5f..549c4bd5caec 100644
> --- a/drivers/pci/bus.c
> +++ b/drivers/pci/bus.c
> @@ -76,6 +76,27 @@ struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n)
> }
> EXPORT_SYMBOL_GPL(pci_bus_resource_n);
>
> +void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res)
> +{
> + struct pci_bus_resource *bus_res, *tmp;
> + int i;
> +
> + for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
> + if (bus->resource[i] == res) {
> + bus->resource[i] = NULL;
> + return;
> + }
> + }
> +
> + list_for_each_entry_safe(bus_res, tmp, &bus->resources, list) {
> + if (bus_res->res == res) {
> + list_del(&bus_res->list);
> + kfree(bus_res);
> + return;
> + }
> + }
> +}
> +
> void pci_bus_remove_resources(struct pci_bus *bus)
> {
> int i;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index fafd8020c6d7..b50e5c79f7e3 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1438,6 +1438,7 @@ void pci_bus_add_resource(struct pci_bus *bus, struct resource *res,
> unsigned int flags);
> struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n);
> void pci_bus_remove_resources(struct pci_bus *bus);
> +void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res);
> int devm_request_pci_bus_resources(struct device *dev,
> struct list_head *resources);
>
> --
> 2.37.2
>