Re: [PATCH v2 2/3] PCI: Allow extending VF BAR within original resource boundary

From: Michał Winiarski
Date: Thu Oct 10 2024 - 05:02:33 EST


On Fri, Sep 20, 2024 at 12:07:34PM +0200, Christian König wrote:
> Am 20.09.24 um 00:35 schrieb Michał Winiarski:
> > VF MMIO resource reservation, either created by system firmware and
> > inherited by Linux PCI subsystem or created by the subsystem itself,
> > contains enough space to fit the BAR of all SR-IOV Virtual Functions
> > that can potentially be created (total VFs supported by the device).
> > This can be leveraged when the device is exposing lower than optimal BAR
> > size as a default, allowing access to the entire resource when lower
> > number of VFs are created.
> > It is achieved by dynamically resizing the BAR to largest possible value
> > that allows to fit all newly created VFs within the original resource
> > boundary.
> >
> > Signed-off-by: Michał Winiarski <michal.winiarski@xxxxxxxxx>
> > ---
> > drivers/pci/iov.c | 92 ++++++++++++++++++++++++++++++++++++++++++++-
> > drivers/pci/pci.h | 1 +
> > include/linux/pci.h | 3 ++
> > 3 files changed, 95 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
> > index e8ccd2ae0f024..d88efbfa70e42 100644
> > --- a/drivers/pci/iov.c
> > +++ b/drivers/pci/iov.c
> > @@ -181,6 +181,86 @@ bool pci_iov_memory_decoding_enabled(struct pci_dev *dev)
> > return cmd & PCI_SRIOV_CTRL_MSE;
> > }
> > +static void pci_iov_resource_do_extend(struct pci_dev *dev, int resno, u16 num_vfs)
> > +{
> > + resource_size_t size;
> > + int ret, old, i;
> > + u32 sizes;
> > +
> > + pci_config_pm_runtime_get(dev);
> > +
> > + if (pci_iov_memory_decoding_enabled(dev)) {
> > + ret = -EBUSY;
> > + goto err;
> > + }
> > +
> > + sizes = pci_rebar_get_possible_sizes(dev, resno);
> > + if (!sizes) {
> > + ret = -ENOTSUPP;
> > + goto err;
> > + }
> > +
> > + old = pci_rebar_get_current_size(dev, resno);
> > + if (old < 0) {
> > + ret = old;
> > + goto err;
> > + }
> > +
> > + while (sizes > 0) {
> > + i = __fls(sizes);
> > + size = pci_rebar_size_to_bytes(i);
> > + if (size * num_vfs <= pci_resource_len(dev, resno)) {
> > + if (i != old) {
> > + ret = pci_rebar_set_size(dev, resno, size);
> > + if (ret)
> > + goto err;
> > +
> > + pci_iov_resource_set_size(dev, resno, size);
> > + pci_iov_update_resource(dev, resno);
> > + }
> > + break;
> > + }
> > + sizes &= ~BIT(i);
> > + }
> > +
> > + pci_config_pm_runtime_put(dev);
> > +
> > + return;
> > +
> > +err:
> > + dev_WARN(&dev->dev, "Failed to extend %s: %d\n",
> > + pci_resource_name(dev, resno), ret);
> > +
> > + pci_config_pm_runtime_put(dev);
> > +}
> > +
> > +static void pci_iov_resource_do_restore(struct pci_dev *dev, int resno)
> > +{
> > + if (dev->sriov->rebar_extend[resno - PCI_IOV_RESOURCES])
> > + pci_iov_resource_do_extend(dev, resno, dev->sriov->total_VFs);
> > +}
> > +
> > +int pci_iov_resource_extend(struct pci_dev *dev, int resno, bool enable)
> > +{
> > + if (!pci_resource_is_iov(dev, resno)) {
> > + dev_WARN(&dev->dev, "%s is not an IOV resource\n",
> > + pci_resource_name(dev, resno));
> > +
> > + return -ENODEV;
> > + }
> > +
> > + if (!pci_rebar_get_possible_sizes(dev, resno))
> > + return -ENOTSUPP;
> > +
> > + if (!enable)
> > + pci_iov_resource_do_restore(dev, resno);
> > +
> > + dev->sriov->rebar_extend[resno - PCI_IOV_RESOURCES] = enable;
> > +
> > + return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(pci_iov_resource_extend);
> > +
> > static void pci_read_vf_config_common(struct pci_dev *virtfn)
> > {
> > struct pci_dev *physfn = virtfn->physfn;
> > @@ -445,7 +525,7 @@ static ssize_t sriov_numvfs_store(struct device *dev,
> > const char *buf, size_t count)
> > {
> > struct pci_dev *pdev = to_pci_dev(dev);
> > - int ret = 0;
> > + int i, ret = 0;
> > u16 num_vfs;
> > if (kstrtou16(buf, 0, &num_vfs) < 0)
> > @@ -487,6 +567,11 @@ static ssize_t sriov_numvfs_store(struct device *dev,
> > goto exit;
> > }
> > + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> > + if (pdev->sriov->rebar_extend[i])
> > + pci_iov_resource_do_extend(pdev, i + PCI_IOV_RESOURCES, num_vfs);
> > + }
> > +
>
> That sounds like a really bad idea to me.
>
> Basically the suggestion is here that the PCI subsystem should silently
> extend and shrink the VF BARs when the number of VFs change?

Why do you think it's a bad idea? Everything is under PCI subsystem
control and the driver in charge has to explicitly opt-in to this
behavior on a per-BAR basis.

> Bjorn has the last word on that but I would say that instead the driver
> owning the PCIe device as hypervisor should resize the VF BARs to a desired
> size and that in turn restricts the number of VFs you can enable.

Then the PCI subsystem would silently change the driver_max_VFs (or new
variable, as driver_max_VFs is under PF control, so it's either new var
or checking VF BAR size in pci_sriov_set_totalvfs).
It also means that we have to do the maths to calculate the new VF limit
in both PCI subsystem and the caller.

We can go this route as well - I just think it's cleaner to keep this
all under PCI subsystem control.

I'll keep the current behavior in v3, but I'm open to changing it.

Thanks,
-Michał

>
> Regards,
> Christian.
>
> > ret = pdev->driver->sriov_configure(pdev, num_vfs);
> > if (ret < 0)
> > goto exit;
> > @@ -881,8 +966,13 @@ static int sriov_init(struct pci_dev *dev, int pos)
> > static void sriov_release(struct pci_dev *dev)
> > {
> > + int i;
> > +
> > BUG_ON(dev->sriov->num_VFs);
> > + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
> > + pci_iov_resource_do_restore(dev, i + PCI_IOV_RESOURCES);
> > +
> > if (dev != dev->sriov->dev)
> > pci_dev_put(dev->sriov->dev);
> > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > index e763b3fd4c7a2..47ed2633232aa 100644
> > --- a/drivers/pci/pci.h
> > +++ b/drivers/pci/pci.h
> > @@ -385,6 +385,7 @@ struct pci_sriov {
> > u16 subsystem_vendor; /* VF subsystem vendor */
> > u16 subsystem_device; /* VF subsystem device */
> > resource_size_t barsz[PCI_SRIOV_NUM_BARS]; /* VF BAR size */
> > + bool rebar_extend[PCI_SRIOV_NUM_BARS]; /* Resize VF BAR */
> > bool drivers_autoprobe; /* Auto probing of VFs by driver */
> > };
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index 4cf89a4b4cbcf..c007119da7b3d 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -2364,6 +2364,7 @@ int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs);
> > int pci_sriov_get_totalvfs(struct pci_dev *dev);
> > int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn);
> > resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno);
> > +int pci_iov_resource_extend(struct pci_dev *dev, int resno, bool enable);
> > void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe);
> > /* Arch may override these (weak) */
> > @@ -2416,6 +2417,8 @@ static inline int pci_sriov_get_totalvfs(struct pci_dev *dev)
> > #define pci_sriov_configure_simple NULL
> > static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
> > { return 0; }
> > +static inline void pci_iov_resource_extend(struct pci_dev *dev, int resno, bool enable)
> > +{ return -ENODEV; }
> > static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { }
> > #endif
>