Re: [PATCH V2] xen/virtio: Handle PCI devices which Host controller is described in DT
From: Oleksandr Tyshchenko
Date: Thu Oct 20 2022 - 10:12:58 EST
On 20.10.22 11:24, Xenia Ragiadakou wrote:
> On 10/19/22 22:41, Oleksandr Tyshchenko wrote:
>
> Hi Oleksandr
Hello Xenia
>
>>
>> On 19.10.22 11:47, Xenia Ragiadakou wrote:
>>
>> Hello Xenia
>>
>>> On 10/19/22 03:58, Stefano Stabellini wrote:
>>>> On Sat, 15 Oct 2022, Oleksandr Tyshchenko wrote:
>>>>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@xxxxxxxx>
>>>>>
>>>>> Use the same "xen-grant-dma" device concept for the PCI devices
>>>>> behind device-tree based PCI Host controller, but with one
>>>>> modification.
>>>>> Unlike for platform devices, we cannot use generic IOMMU bindings
>>>>> (iommus property), as we need to support more flexible configuration.
>>>>> The problem is that PCI devices under the single PCI Host controller
>>>>> may have the backends running in different Xen domains and thus have
>>>>> different endpoints ID (backend domains ID).
>>>>>
>>>>> So use generic PCI-IOMMU bindings instead (iommu-map/iommu-map-mask
>>>>> properties) which allows us to describe relationship between PCI
>>>>> devices and backend domains ID properly.
>>>>>
>>>>> Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@xxxxxxxx>
>>>>
>>>> Now that I understood the approach and the reasons for it, I can
>>>> review
>>>> the patch :-)
>>>>
>>>> Please add an example of the bindings in the commit message.
>>>>
>>>>
>>>>> ---
>>>>> Slightly RFC. This is needed to support Xen grant mappings for
>>>>> virtio-pci devices
>>>>> on Arm at some point in the future. The Xen toolstack side is not
>>>>> completely ready yet.
>>>>> Here, for PCI devices we use more flexible way to pass backend domid
>>>>> to the guest
>>>>> than for platform devices.
>>>>>
>>>>> Changes V1 -> V2:
>>>>> - update commit description
>>>>> - rebase
>>>>> - rework to use generic PCI-IOMMU bindings instead of generic
>>>>> IOMMU bindings
>>>>>
>>>>> Previous discussion is at:
>>>>> https://urldefense.com/v3/__https://lore.kernel.org/xen-devel/20221006174804.2003029-1-olekstysh@xxxxxxxxx/__;!!GF_29dbcQIUBPA!3-vq7Edm3XfKtD5cnNjnOzDQvuo_XrhJ73yH-nPfqOkGGU0IjLG7R7MR_nAJCAPeOutHRLT44wKYwQwz3SauACie_ZAy$
>>>>>
>>>>> [lore[.]kernel[.]org]
>>>>>
>>>>> Based on:
>>>>> https://urldefense.com/v3/__https://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git/log/?h=for-linus-6.1__;!!GF_29dbcQIUBPA!3-vq7Edm3XfKtD5cnNjnOzDQvuo_XrhJ73yH-nPfqOkGGU0IjLG7R7MR_nAJCAPeOutHRLT44wKYwQwz3SauAEnMDHAq$
>>>>>
>>>>> [git[.]kernel[.]org]
>>>>> ---
>>>>> drivers/xen/grant-dma-ops.c | 87
>>>>> ++++++++++++++++++++++++++++++++-----
>>>>> 1 file changed, 76 insertions(+), 11 deletions(-)
>>>>>
>>>>> diff --git a/drivers/xen/grant-dma-ops.c
>>>>> b/drivers/xen/grant-dma-ops.c
>>>>> index daa525df7bdc..b79d9d6ce154 100644
>>>>> --- a/drivers/xen/grant-dma-ops.c
>>>>> +++ b/drivers/xen/grant-dma-ops.c
>>>>> @@ -10,6 +10,7 @@
>>>>> #include <linux/module.h>
>>>>> #include <linux/dma-map-ops.h>
>>>>> #include <linux/of.h>
>>>>> +#include <linux/pci.h>
>>>>> #include <linux/pfn.h>
>>>>> #include <linux/xarray.h>
>>>>> #include <linux/virtio_anchor.h>
>>>>> @@ -292,12 +293,55 @@ static const struct dma_map_ops
>>>>> xen_grant_dma_ops = {
>>>>> .dma_supported = xen_grant_dma_supported,
>>>>> };
>>>>> +static struct device_node *xen_dt_get_pci_host_node(struct device
>>>>> *dev)
>>>>> +{
>>>>> + struct pci_dev *pdev = to_pci_dev(dev);
>>>>> + struct pci_bus *bus = pdev->bus;
>>>>> +
>>>>> + /* Walk up to the root bus to look for PCI Host controller */
>>>>> + while (!pci_is_root_bus(bus))
>>>>> + bus = bus->parent;
>>>>> +
>>>>> + return of_node_get(bus->bridge->parent->of_node);
>>>>> +}
>>>>
>>>> It seems silly that we need to walk the hierachy that way, but I
>>>> couldn't find another way to do it
>>>>
>>>>
>>>>> +static struct device_node *xen_dt_get_node(struct device *dev)
>>>>> +{
>>>>> + if (dev_is_pci(dev))
>>>>> + return xen_dt_get_pci_host_node(dev);
>>>>> +
>>>>> + return of_node_get(dev->of_node);
>>>>> +}
>>>>> +
>>>>> +static int xen_dt_map_id(struct device *dev, struct device_node
>>>>> **iommu_np,
>>>>> + u32 *sid)
>>>>> +{
>>>>> + struct pci_dev *pdev = to_pci_dev(dev);
>>>>> + u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn);
>>>>> + struct device_node *host_np;
>>>>> + int ret;
>>>>> +
>>>>> + host_np = xen_dt_get_pci_host_node(dev);
>>>>> + if (!host_np)
>>>>> + return -ENODEV;
>>>>> +
>>>>> + ret = of_map_id(host_np, rid, "iommu-map", "iommu-map-mask",
>>>>> iommu_np, sid);
>>>>> + of_node_put(host_np);
>>>>> + return ret;
>>>>> +}
>>>>> +
>>>>> static bool xen_is_dt_grant_dma_device(struct device *dev)
>>>>> {
>>>>> - struct device_node *iommu_np;
>>>>> + struct device_node *iommu_np = NULL;
>>>>> bool has_iommu;
>>>>> - iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
>>>>> + if (dev_is_pci(dev)) {
>>>>> + if (xen_dt_map_id(dev, &iommu_np, NULL))
>>>>> + return false;
>>>>> + } else
>>>>> + iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
>>>>> +
>>>>> has_iommu = iommu_np &&
>>>>> of_device_is_compatible(iommu_np, "xen,grant-dma");
>>>>> of_node_put(iommu_np);
>>>>> @@ -307,9 +351,17 @@ static bool xen_is_dt_grant_dma_device(struct
>>>>> device *dev)
>>>>> bool xen_is_grant_dma_device(struct device *dev)
>>>>> {
>>>>> + struct device_node *np;
>>>>> +
>>>>> /* XXX Handle only DT devices for now */
>>>>> - if (dev->of_node)
>>>>> - return xen_is_dt_grant_dma_device(dev);
>>>>> + np = xen_dt_get_node(dev);
>>>>> + if (np) {
>>>>> + bool ret;
>>>>> +
>>>>> + ret = xen_is_dt_grant_dma_device(dev);
>>>>> + of_node_put(np);
>>>>> + return ret;
>>>>> + }
>>>>
>>>> We don't need to walk the PCI hierachy twice. Maybe we can add the
>>>> of_node check directly to xen_is_dt_grant_dma_device?
>>>>
>>>
>>> I think in general we could pass directly the host bridge device if
>>> dev_is_pci(dev) (which can be retrieved with
>>> pci_get_host_bridge_device(to_pci_dev(dev), and after done with it
>>> pci_put_host_bridge_device(phb)).
>>> So that, xen_is_dt_grant_dma_device() and
>>> xen_dt_grant_init_backend_domid() won't need to discover it themselves.
>>> This will simplify the code.
>>
>>
>> Good point. I have some remark. Can we use pci_find_host_bridge()
>> instead? This way we don't have to add #include "../pci/pci.h", and have
>> to drop reference afterwards.
>>
>> With that xen_dt_get_pci_host_node() will became the following:
>>
>>
>> static struct device_node *xen_dt_get_pci_host_node(struct device *dev)
>> {
>> struct pci_host_bridge *bridge =
>> pci_find_host_bridge(to_pci_dev(dev)->bus);
>>
>> return of_node_get(bridge->dev.parent->of_node);
>> }
>>
>
> You are right. I prefer your version instead of the above.
ok, thanks
>
>
>>
>> With Stefano's suggestion, we won't walk the PCI hierarchy twice when
>> executing xen_is_grant_dma_device() for PCI device:
>>
>> xen_is_grant_dma_device() -> xen_is_dt_grant_dma_device() ->
>> xen_dt_map_id() -> xen_dt_get_pci_host_node()
>>
>>
>> What do you think?
>>
>
> I was thinking passing the device_node along with the device in the
> function arguments. More specifically, of doing this (not tested, just
> an idea):
>
> bool xen_is_grant_dma_device(struct device *dev)
> {
> struct device_node *np;
> bool has_iommu = false;
>
> /* XXX Handle only DT devices for now */
> np = xen_dt_get_node(dev);
> if (np)
> has_iommu = xen_is_dt_grant_dma_device(dev, np);
> of_node_put(np);
> return has_iommu;
> }
>
> static bool xen_is_dt_grant_dma_device(struct device *dev,
> struct device_node *np)
> {
> struct device_node *iommu_np = NULL;
> bool has_iommu;
>
> if (dev_is_pci(dev)) {
> struct pci_dev *pdev = to_pci_dev(dev);
> u32 id = PCI_DEVID(pdev->bus->number, pdev->devfn);
> of_map_id(np, id, "iommu-map", "iommu-map-mask", &iommu_np,
> NULL);
> } else {
> iommu_np = of_parse_phandle(np, "iommus", 0);
> }
>
> has_iommu = iommu_np && of_device_is_compatible(iommu_np,
> "xen,grant-dma");
> of_node_put(iommu_np);
>
> return has_iommu;
> }
I got it.
xen_is_grant_dma_device() for V3 won't call xen_dt_get_node(), but call
xen_is_dt_grant_dma_device() directly.
static bool xen_is_dt_grant_dma_device(struct device *dev)
{
struct device_node *iommu_np = NULL;
bool has_iommu;
if (dev_is_pci(dev)) {
if (xen_dt_map_id(dev, &iommu_np, NULL))
return false;
} else if (dev->of_node)
iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
else
return false;
has_iommu = iommu_np &&
of_device_is_compatible(iommu_np, "xen,grant-dma");
of_node_put(iommu_np);
return has_iommu;
}
bool xen_is_grant_dma_device(struct device *dev)
{
/* XXX Handle only DT devices for now */
return xen_is_dt_grant_dma_device(dev);
}
>
> I 'm wondering ... is it possible for the host bridge device node to
> have the iommus property set? meaning that all of its pci devs will
> have the same backend?
Good question. I think, it is possible... This is technically what V1 is
doing.
Are you asking because to support "iommus" for PCI devices as well to
describe that use-case with all PCI devices having the same endpoint ID
(backend ID)?
If yes, I think, this could be still described by "iommu-map" property,
something like that (if we don't want to describe mapping for each PCI
device one-by-one).
iommu-map = <0x0 &iommu X 0x1>;
iommu-map-mask = <0x0>;
where the X is backend ID.
It feels to me that it should be written down somewhere that for
platform devices we expect "iommus" and for PCI devices we expect
"iommu-map/iommu-map-mask" to be present.
>
>
>>>
>>>>
>>>>> return false;
>>>>> }
>>>>> @@ -325,12 +377,19 @@ bool xen_virtio_mem_acc(struct virtio_device
>>>>> *dev)
>>>>> static int xen_dt_grant_init_backend_domid(struct device *dev,
>>>>> struct xen_grant_dma_data *data)
>>>>> {
>>>>> - struct of_phandle_args iommu_spec;
>>>>> + struct of_phandle_args iommu_spec = { .args_count = 1 };
>>>>> - if (of_parse_phandle_with_args(dev->of_node, "iommus",
>>>>> "#iommu-cells",
>>>>> - 0, &iommu_spec)) {
>>>>> - dev_err(dev, "Cannot parse iommus property\n");
>>>>> - return -ESRCH;
>>>>> + if (dev_is_pci(dev)) {
>>>>> + if (xen_dt_map_id(dev, &iommu_spec.np, iommu_spec.args)) {
>>>>> + dev_err(dev, "Cannot translate ID\n");
>>>>> + return -ESRCH;
>>>>> + }
>>>>> + } else {
>>>>> + if (of_parse_phandle_with_args(dev->of_node, "iommus",
>>>>> "#iommu-cells",
>>>>> + 0, &iommu_spec)) {
>>>>> + dev_err(dev, "Cannot parse iommus property\n");
>>>>> + return -ESRCH;
>>>>> + }
>>>>> }
>>>>> if (!of_device_is_compatible(iommu_spec.np,
>>>>> "xen,grant-dma") ||
>>>>> @@ -354,6 +413,7 @@ static int
>>>>> xen_dt_grant_init_backend_domid(struct device *dev,
>>>>> void xen_grant_setup_dma_ops(struct device *dev)
>>>>> {
>>>>> struct xen_grant_dma_data *data;
>>>>> + struct device_node *np;
>>>>> data = find_xen_grant_dma_data(dev);
>>>>> if (data) {
>>>>> @@ -365,8 +425,13 @@ void xen_grant_setup_dma_ops(struct device *dev)
>>>>> if (!data)
>>>>> goto err;
>>>>> - if (dev->of_node) {
>>>>> - if (xen_dt_grant_init_backend_domid(dev, data))
>>>>> + np = xen_dt_get_node(dev);
>>>>> + if (np) {
>>>>> + int ret;
>>>>> +
>>>>> + ret = xen_dt_grant_init_backend_domid(dev, data);
>>>>> + of_node_put(np);
>>>>> + if (ret)
>>>>> goto err;
>>>>> } else if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) {
>>>>> dev_info(dev, "Using dom0 as backend\n");
>>>>> --
>>>>> 2.25.1
>>>>>
>>>>
>>>
>
--
Regards,
Oleksandr Tyshchenko