Re: [PATCH V2] xen/virtio: Handle PCI devices which Host controller is described in DT

From: Oleksandr Tyshchenko
Date: Thu Oct 20 2022 - 10:12:58 EST



On 20.10.22 11:24, Xenia Ragiadakou wrote:
> On 10/19/22 22:41, Oleksandr Tyshchenko wrote:
>
> Hi Oleksandr


Hello Xenia


>
>>
>> On 19.10.22 11:47, Xenia Ragiadakou wrote:
>>
>> Hello Xenia
>>
>>> On 10/19/22 03:58, Stefano Stabellini wrote:
>>>> On Sat, 15 Oct 2022, Oleksandr Tyshchenko wrote:
>>>>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@xxxxxxxx>
>>>>>
>>>>> Use the same "xen-grant-dma" device concept for the PCI devices
>>>>> behind device-tree based PCI Host controller, but with one
>>>>> modification.
>>>>> Unlike for platform devices, we cannot use generic IOMMU bindings
>>>>> (iommus property), as we need to support more flexible configuration.
>>>>> The problem is that PCI devices under the single PCI Host controller
>>>>> may have the backends running in different Xen domains and thus have
>>>>> different endpoints ID (backend domains ID).
>>>>>
>>>>> So use generic PCI-IOMMU bindings instead (iommu-map/iommu-map-mask
>>>>> properties) which allows us to describe relationship between PCI
>>>>> devices and backend domains ID properly.
>>>>>
>>>>> Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@xxxxxxxx>
>>>>
>>>> Now that I understood the approach and the reasons for it, I can
>>>> review
>>>> the patch :-)
>>>>
>>>> Please add an example of the bindings in the commit message.
>>>>
>>>>
>>>>> ---
>>>>> Slightly RFC. This is needed to support Xen grant mappings for
>>>>> virtio-pci devices
>>>>> on Arm at some point in the future. The Xen toolstack side is not
>>>>> completely ready yet.
>>>>> Here, for PCI devices we use more flexible way to pass backend domid
>>>>> to the guest
>>>>> than for platform devices.
>>>>>
>>>>> Changes V1 -> V2:
>>>>>      - update commit description
>>>>>      - rebase
>>>>>      - rework to use generic PCI-IOMMU bindings instead of generic
>>>>> IOMMU bindings
>>>>>
>>>>> Previous discussion is at:
>>>>> https://urldefense.com/v3/__https://lore.kernel.org/xen-devel/20221006174804.2003029-1-olekstysh@xxxxxxxxx/__;!!GF_29dbcQIUBPA!3-vq7Edm3XfKtD5cnNjnOzDQvuo_XrhJ73yH-nPfqOkGGU0IjLG7R7MR_nAJCAPeOutHRLT44wKYwQwz3SauACie_ZAy$
>>>>>
>>>>> [lore[.]kernel[.]org]
>>>>>
>>>>> Based on:
>>>>> https://urldefense.com/v3/__https://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git/log/?h=for-linus-6.1__;!!GF_29dbcQIUBPA!3-vq7Edm3XfKtD5cnNjnOzDQvuo_XrhJ73yH-nPfqOkGGU0IjLG7R7MR_nAJCAPeOutHRLT44wKYwQwz3SauAEnMDHAq$
>>>>>
>>>>> [git[.]kernel[.]org]
>>>>> ---
>>>>>    drivers/xen/grant-dma-ops.c | 87
>>>>> ++++++++++++++++++++++++++++++++-----
>>>>>    1 file changed, 76 insertions(+), 11 deletions(-)
>>>>>
>>>>> diff --git a/drivers/xen/grant-dma-ops.c
>>>>> b/drivers/xen/grant-dma-ops.c
>>>>> index daa525df7bdc..b79d9d6ce154 100644
>>>>> --- a/drivers/xen/grant-dma-ops.c
>>>>> +++ b/drivers/xen/grant-dma-ops.c
>>>>> @@ -10,6 +10,7 @@
>>>>>    #include <linux/module.h>
>>>>>    #include <linux/dma-map-ops.h>
>>>>>    #include <linux/of.h>
>>>>> +#include <linux/pci.h>
>>>>>    #include <linux/pfn.h>
>>>>>    #include <linux/xarray.h>
>>>>>    #include <linux/virtio_anchor.h>
>>>>> @@ -292,12 +293,55 @@ static const struct dma_map_ops
>>>>> xen_grant_dma_ops = {
>>>>>        .dma_supported = xen_grant_dma_supported,
>>>>>    };
>>>>>    +static struct device_node *xen_dt_get_pci_host_node(struct device
>>>>> *dev)
>>>>> +{
>>>>> +    struct pci_dev *pdev = to_pci_dev(dev);
>>>>> +    struct pci_bus *bus = pdev->bus;
>>>>> +
>>>>> +    /* Walk up to the root bus to look for PCI Host controller */
>>>>> +    while (!pci_is_root_bus(bus))
>>>>> +        bus = bus->parent;
>>>>> +
>>>>> +    return of_node_get(bus->bridge->parent->of_node);
>>>>> +}
>>>>
>>>> It seems silly that we need to walk the hierachy that way, but I
>>>> couldn't find another way to do it
>>>>
>>>>
>>>>> +static struct device_node *xen_dt_get_node(struct device *dev)
>>>>> +{
>>>>> +    if (dev_is_pci(dev))
>>>>> +        return xen_dt_get_pci_host_node(dev);
>>>>> +
>>>>> +    return of_node_get(dev->of_node);
>>>>> +}
>>>>> +
>>>>> +static int xen_dt_map_id(struct device *dev, struct device_node
>>>>> **iommu_np,
>>>>> +             u32 *sid)
>>>>> +{
>>>>> +    struct pci_dev *pdev = to_pci_dev(dev);
>>>>> +    u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn);
>>>>> +    struct device_node *host_np;
>>>>> +    int ret;
>>>>> +
>>>>> +    host_np = xen_dt_get_pci_host_node(dev);
>>>>> +    if (!host_np)
>>>>> +        return -ENODEV;
>>>>> +
>>>>> +    ret = of_map_id(host_np, rid, "iommu-map", "iommu-map-mask",
>>>>> iommu_np, sid);
>>>>> +    of_node_put(host_np);
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>>    static bool xen_is_dt_grant_dma_device(struct device *dev)
>>>>>    {
>>>>> -    struct device_node *iommu_np;
>>>>> +    struct device_node *iommu_np = NULL;
>>>>>        bool has_iommu;
>>>>>    -    iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
>>>>> +    if (dev_is_pci(dev)) {
>>>>> +        if (xen_dt_map_id(dev, &iommu_np, NULL))
>>>>> +            return false;
>>>>> +    } else
>>>>> +        iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
>>>>> +
>>>>>        has_iommu = iommu_np &&
>>>>>                of_device_is_compatible(iommu_np, "xen,grant-dma");
>>>>>        of_node_put(iommu_np);
>>>>> @@ -307,9 +351,17 @@ static bool xen_is_dt_grant_dma_device(struct
>>>>> device *dev)
>>>>>      bool xen_is_grant_dma_device(struct device *dev)
>>>>>    {
>>>>> +    struct device_node *np;
>>>>> +
>>>>>        /* XXX Handle only DT devices for now */
>>>>> -    if (dev->of_node)
>>>>> -        return xen_is_dt_grant_dma_device(dev);
>>>>> +    np = xen_dt_get_node(dev);
>>>>> +    if (np) {
>>>>> +        bool ret;
>>>>> +
>>>>> +        ret = xen_is_dt_grant_dma_device(dev);
>>>>> +        of_node_put(np);
>>>>> +        return ret;
>>>>> +    }
>>>>
>>>> We don't need to walk the PCI hierachy twice. Maybe we can add the
>>>> of_node check directly to xen_is_dt_grant_dma_device?
>>>>
>>>
>>> I think in general we could pass directly the host bridge device if
>>> dev_is_pci(dev) (which can be retrieved with
>>> pci_get_host_bridge_device(to_pci_dev(dev), and after done with it
>>> pci_put_host_bridge_device(phb)).
>>> So that, xen_is_dt_grant_dma_device() and
>>> xen_dt_grant_init_backend_domid() won't need to discover it themselves.
>>> This will simplify the code.
>>
>>
>> Good point. I have some remark. Can we use pci_find_host_bridge()
>> instead? This way we don't have to add #include "../pci/pci.h", and have
>> to drop reference afterwards.
>>
>> With that xen_dt_get_pci_host_node() will became the following:
>>
>>
>> static struct device_node *xen_dt_get_pci_host_node(struct device *dev)
>> {
>>       struct pci_host_bridge *bridge =
>> pci_find_host_bridge(to_pci_dev(dev)->bus);
>>
>>       return of_node_get(bridge->dev.parent->of_node);
>> }
>>
>
> You are right. I prefer your version instead of the above.


ok, thanks


>
>
>>
>> With Stefano's suggestion, we won't walk the PCI hierarchy twice when
>> executing xen_is_grant_dma_device() for PCI device:
>>
>> xen_is_grant_dma_device() -> xen_is_dt_grant_dma_device() ->
>> xen_dt_map_id() -> xen_dt_get_pci_host_node()
>>
>>
>> What do you think?
>>
>
> I was thinking passing the device_node along with the device in the
> function arguments. More specifically, of doing this (not tested, just
> an idea):
>
> bool xen_is_grant_dma_device(struct device *dev)
> {
>     struct device_node *np;
>     bool has_iommu = false;
>
>     /* XXX Handle only DT devices for now */
>     np = xen_dt_get_node(dev);
>     if (np)
>         has_iommu = xen_is_dt_grant_dma_device(dev, np);
>     of_node_put(np);
>     return has_iommu;
> }
>
> static bool xen_is_dt_grant_dma_device(struct device *dev,
>                                        struct device_node *np)
> {
>     struct device_node *iommu_np = NULL;
>     bool has_iommu;
>
>     if (dev_is_pci(dev)) {
>         struct pci_dev *pdev = to_pci_dev(dev);
>     u32 id = PCI_DEVID(pdev->bus->number, pdev->devfn);
>         of_map_id(np, id, "iommu-map", "iommu-map-mask", &iommu_np,
> NULL);
>     } else {
>         iommu_np = of_parse_phandle(np, "iommus", 0);
>     }
>
>     has_iommu = iommu_np && of_device_is_compatible(iommu_np,
> "xen,grant-dma");
>     of_node_put(iommu_np);
>
>     return has_iommu;
> }


I got it.

xen_is_grant_dma_device() for V3 won't call xen_dt_get_node(), but call
xen_is_dt_grant_dma_device() directly.

static bool xen_is_dt_grant_dma_device(struct device *dev)
{
    struct device_node *iommu_np = NULL;
    bool has_iommu;

    if (dev_is_pci(dev)) {
        if (xen_dt_map_id(dev, &iommu_np, NULL))
            return false;
    } else if (dev->of_node)
        iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
    else
        return false;

    has_iommu = iommu_np &&
            of_device_is_compatible(iommu_np, "xen,grant-dma");
    of_node_put(iommu_np);

    return has_iommu;
}

bool xen_is_grant_dma_device(struct device *dev)
{
    /* XXX Handle only DT devices for now */
    return xen_is_dt_grant_dma_device(dev);
}



>
> I 'm wondering ... is it possible for the host bridge device node to
> have the iommus property set? meaning that all of its pci devs will
> have the same backend?

Good question. I think, it is possible... This is technically what V1 is
doing.


Are you asking because to support "iommus" for PCI devices as well to
describe that use-case with all PCI devices having the same endpoint ID
(backend ID)?
If yes, I think, this could be still described by "iommu-map" property,
something like that (if we don't want to describe mapping for each PCI
device one-by-one).

iommu-map = <0x0 &iommu X 0x1>;

iommu-map-mask = <0x0>;

where the X is backend ID.


It feels to me that it should be written down somewhere that for
platform devices we expect "iommus" and for PCI devices we expect
"iommu-map/iommu-map-mask" to be present.



>
>
>>>
>>>>
>>>>>        return false;
>>>>>    }
>>>>> @@ -325,12 +377,19 @@ bool xen_virtio_mem_acc(struct virtio_device
>>>>> *dev)
>>>>>    static int xen_dt_grant_init_backend_domid(struct device *dev,
>>>>>                           struct xen_grant_dma_data *data)
>>>>>    {
>>>>> -    struct of_phandle_args iommu_spec;
>>>>> +    struct of_phandle_args iommu_spec = { .args_count = 1 };
>>>>>    -    if (of_parse_phandle_with_args(dev->of_node, "iommus",
>>>>> "#iommu-cells",
>>>>> -            0, &iommu_spec)) {
>>>>> -        dev_err(dev, "Cannot parse iommus property\n");
>>>>> -        return -ESRCH;
>>>>> +    if (dev_is_pci(dev)) {
>>>>> +        if (xen_dt_map_id(dev, &iommu_spec.np, iommu_spec.args)) {
>>>>> +            dev_err(dev, "Cannot translate ID\n");
>>>>> +            return -ESRCH;
>>>>> +        }
>>>>> +    } else {
>>>>> +        if (of_parse_phandle_with_args(dev->of_node, "iommus",
>>>>> "#iommu-cells",
>>>>> +                0, &iommu_spec)) {
>>>>> +            dev_err(dev, "Cannot parse iommus property\n");
>>>>> +            return -ESRCH;
>>>>> +        }
>>>>>        }
>>>>>          if (!of_device_is_compatible(iommu_spec.np,
>>>>> "xen,grant-dma") ||
>>>>> @@ -354,6 +413,7 @@ static int
>>>>> xen_dt_grant_init_backend_domid(struct device *dev,
>>>>>    void xen_grant_setup_dma_ops(struct device *dev)
>>>>>    {
>>>>>        struct xen_grant_dma_data *data;
>>>>> +    struct device_node *np;
>>>>>          data = find_xen_grant_dma_data(dev);
>>>>>        if (data) {
>>>>> @@ -365,8 +425,13 @@ void xen_grant_setup_dma_ops(struct device *dev)
>>>>>        if (!data)
>>>>>            goto err;
>>>>>    -    if (dev->of_node) {
>>>>> -        if (xen_dt_grant_init_backend_domid(dev, data))
>>>>> +    np = xen_dt_get_node(dev);
>>>>> +    if (np) {
>>>>> +        int ret;
>>>>> +
>>>>> +        ret = xen_dt_grant_init_backend_domid(dev, data);
>>>>> +        of_node_put(np);
>>>>> +        if (ret)
>>>>>                goto err;
>>>>>        } else if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) {
>>>>>            dev_info(dev, "Using dom0 as backend\n");
>>>>> --
>>>>> 2.25.1
>>>>>
>>>>
>>>
>
--
Regards,

Oleksandr Tyshchenko