Re: [PATCH V2] xen/virtio: Handle PCI devices which Host controller is described in DT
From: Oleksandr Tyshchenko
Date: Thu Oct 20 2022 - 16:08:19 EST
On 20.10.22 21:11, Xenia Ragiadakou wrote:
Hello Xenia
> On 10/20/22 17:12, Oleksandr Tyshchenko wrote:
>>
>> On 20.10.22 11:24, Xenia Ragiadakou wrote:
>>> On 10/19/22 22:41, Oleksandr Tyshchenko wrote:
>>>
>>> Hi Oleksandr
>>
>>
>> Hello Xenia
>>
>>
>>>
>>>>
>>>> On 19.10.22 11:47, Xenia Ragiadakou wrote:
>>>>
>>>> Hello Xenia
>>>>
>>>>> On 10/19/22 03:58, Stefano Stabellini wrote:
>>>>>> On Sat, 15 Oct 2022, Oleksandr Tyshchenko wrote:
>>>>>>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@xxxxxxxx>
>>>>>>>
>>>>>>> Use the same "xen-grant-dma" device concept for the PCI devices
>>>>>>> behind device-tree based PCI Host controller, but with one
>>>>>>> modification.
>>>>>>> Unlike for platform devices, we cannot use generic IOMMU bindings
>>>>>>> (iommus property), as we need to support more flexible
>>>>>>> configuration.
>>>>>>> The problem is that PCI devices under the single PCI Host
>>>>>>> controller
>>>>>>> may have the backends running in different Xen domains and thus
>>>>>>> have
>>>>>>> different endpoints ID (backend domains ID).
>>>>>>>
>>>>>>> So use generic PCI-IOMMU bindings instead (iommu-map/iommu-map-mask
>>>>>>> properties) which allows us to describe relationship between PCI
>>>>>>> devices and backend domains ID properly.
>>>>>>>
>>>>>>> Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@xxxxxxxx>
>>>>>>
>>>>>> Now that I understood the approach and the reasons for it, I can
>>>>>> review
>>>>>> the patch :-)
>>>>>>
>>>>>> Please add an example of the bindings in the commit message.
>>>>>>
>>>>>>
>>>>>>> ---
>>>>>>> Slightly RFC. This is needed to support Xen grant mappings for
>>>>>>> virtio-pci devices
>>>>>>> on Arm at some point in the future. The Xen toolstack side is not
>>>>>>> completely ready yet.
>>>>>>> Here, for PCI devices we use more flexible way to pass backend
>>>>>>> domid
>>>>>>> to the guest
>>>>>>> than for platform devices.
>>>>>>>
>>>>>>> Changes V1 -> V2:
>>>>>>> - update commit description
>>>>>>> - rebase
>>>>>>> - rework to use generic PCI-IOMMU bindings instead of generic
>>>>>>> IOMMU bindings
>>>>>>>
>>>>>>> Previous discussion is at:
>>>>>>> https://urldefense.com/v3/__https://lore.kernel.org/xen-devel/20221006174804.2003029-1-olekstysh@xxxxxxxxx/__;!!GF_29dbcQIUBPA!3-vq7Edm3XfKtD5cnNjnOzDQvuo_XrhJ73yH-nPfqOkGGU0IjLG7R7MR_nAJCAPeOutHRLT44wKYwQwz3SauACie_ZAy$
>>>>>>>
>>>>>>>
>>>>>>> [lore[.]kernel[.]org]
>>>>>>>
>>>>>>> Based on:
>>>>>>> https://urldefense.com/v3/__https://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git/log/?h=for-linus-6.1__;!!GF_29dbcQIUBPA!3-vq7Edm3XfKtD5cnNjnOzDQvuo_XrhJ73yH-nPfqOkGGU0IjLG7R7MR_nAJCAPeOutHRLT44wKYwQwz3SauAEnMDHAq$
>>>>>>>
>>>>>>>
>>>>>>> [git[.]kernel[.]org]
>>>>>>> ---
>>>>>>> drivers/xen/grant-dma-ops.c | 87
>>>>>>> ++++++++++++++++++++++++++++++++-----
>>>>>>> 1 file changed, 76 insertions(+), 11 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/xen/grant-dma-ops.c
>>>>>>> b/drivers/xen/grant-dma-ops.c
>>>>>>> index daa525df7bdc..b79d9d6ce154 100644
>>>>>>> --- a/drivers/xen/grant-dma-ops.c
>>>>>>> +++ b/drivers/xen/grant-dma-ops.c
>>>>>>> @@ -10,6 +10,7 @@
>>>>>>> #include <linux/module.h>
>>>>>>> #include <linux/dma-map-ops.h>
>>>>>>> #include <linux/of.h>
>>>>>>> +#include <linux/pci.h>
>>>>>>> #include <linux/pfn.h>
>>>>>>> #include <linux/xarray.h>
>>>>>>> #include <linux/virtio_anchor.h>
>>>>>>> @@ -292,12 +293,55 @@ static const struct dma_map_ops
>>>>>>> xen_grant_dma_ops = {
>>>>>>> .dma_supported = xen_grant_dma_supported,
>>>>>>> };
>>>>>>> +static struct device_node *xen_dt_get_pci_host_node(struct
>>>>>>> device
>>>>>>> *dev)
>>>>>>> +{
>>>>>>> + struct pci_dev *pdev = to_pci_dev(dev);
>>>>>>> + struct pci_bus *bus = pdev->bus;
>>>>>>> +
>>>>>>> + /* Walk up to the root bus to look for PCI Host controller */
>>>>>>> + while (!pci_is_root_bus(bus))
>>>>>>> + bus = bus->parent;
>>>>>>> +
>>>>>>> + return of_node_get(bus->bridge->parent->of_node);
>>>>>>> +}
>>>>>>
>>>>>> It seems silly that we need to walk the hierachy that way, but I
>>>>>> couldn't find another way to do it
>>>>>>
>>>>>>
>>>>>>> +static struct device_node *xen_dt_get_node(struct device *dev)
>>>>>>> +{
>>>>>>> + if (dev_is_pci(dev))
>>>>>>> + return xen_dt_get_pci_host_node(dev);
>>>>>>> +
>>>>>>> + return of_node_get(dev->of_node);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int xen_dt_map_id(struct device *dev, struct device_node
>>>>>>> **iommu_np,
>>>>>>> + u32 *sid)
>>>>>>> +{
>>>>>>> + struct pci_dev *pdev = to_pci_dev(dev);
>>>>>>> + u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn);
>>>>>>> + struct device_node *host_np;
>>>>>>> + int ret;
>>>>>>> +
>>>>>>> + host_np = xen_dt_get_pci_host_node(dev);
>>>>>>> + if (!host_np)
>>>>>>> + return -ENODEV;
>>>>>>> +
>>>>>>> + ret = of_map_id(host_np, rid, "iommu-map", "iommu-map-mask",
>>>>>>> iommu_np, sid);
>>>>>>> + of_node_put(host_np);
>>>>>>> + return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>> static bool xen_is_dt_grant_dma_device(struct device *dev)
>>>>>>> {
>>>>>>> - struct device_node *iommu_np;
>>>>>>> + struct device_node *iommu_np = NULL;
>>>>>>> bool has_iommu;
>>>>>>> - iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
>>>>>>> + if (dev_is_pci(dev)) {
>>>>>>> + if (xen_dt_map_id(dev, &iommu_np, NULL))
>>>>>>> + return false;
>>>>>>> + } else
>>>>>>> + iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
>>>>>>> +
>>>>>>> has_iommu = iommu_np &&
>>>>>>> of_device_is_compatible(iommu_np, "xen,grant-dma");
>>>>>>> of_node_put(iommu_np);
>>>>>>> @@ -307,9 +351,17 @@ static bool xen_is_dt_grant_dma_device(struct
>>>>>>> device *dev)
>>>>>>> bool xen_is_grant_dma_device(struct device *dev)
>>>>>>> {
>>>>>>> + struct device_node *np;
>>>>>>> +
>>>>>>> /* XXX Handle only DT devices for now */
>>>>>>> - if (dev->of_node)
>>>>>>> - return xen_is_dt_grant_dma_device(dev);
>>>>>>> + np = xen_dt_get_node(dev);
>>>>>>> + if (np) {
>>>>>>> + bool ret;
>>>>>>> +
>>>>>>> + ret = xen_is_dt_grant_dma_device(dev);
>>>>>>> + of_node_put(np);
>>>>>>> + return ret;
>>>>>>> + }
>>>>>>
>>>>>> We don't need to walk the PCI hierachy twice. Maybe we can add the
>>>>>> of_node check directly to xen_is_dt_grant_dma_device?
>>>>>>
>>>>>
>>>>> I think in general we could pass directly the host bridge device if
>>>>> dev_is_pci(dev) (which can be retrieved with
>>>>> pci_get_host_bridge_device(to_pci_dev(dev), and after done with it
>>>>> pci_put_host_bridge_device(phb)).
>>>>> So that, xen_is_dt_grant_dma_device() and
>>>>> xen_dt_grant_init_backend_domid() won't need to discover it
>>>>> themselves.
>>>>> This will simplify the code.
>>>>
>>>>
>>>> Good point. I have some remark. Can we use pci_find_host_bridge()
>>>> instead? This way we don't have to add #include "../pci/pci.h", and
>>>> have
>>>> to drop reference afterwards.
>>>>
>>>> With that xen_dt_get_pci_host_node() will became the following:
>>>>
>>>>
>>>> static struct device_node *xen_dt_get_pci_host_node(struct device
>>>> *dev)
>>>> {
>>>> struct pci_host_bridge *bridge =
>>>> pci_find_host_bridge(to_pci_dev(dev)->bus);
>>>>
>>>> return of_node_get(bridge->dev.parent->of_node);
>>>> }
>>>>
>>>
>>> You are right. I prefer your version instead of the above.
>>
>>
>> ok, thanks
>>
>>
>>>
>>>
>>>>
>>>> With Stefano's suggestion, we won't walk the PCI hierarchy twice when
>>>> executing xen_is_grant_dma_device() for PCI device:
>>>>
>>>> xen_is_grant_dma_device() -> xen_is_dt_grant_dma_device() ->
>>>> xen_dt_map_id() -> xen_dt_get_pci_host_node()
>>>>
>>>>
>>>> What do you think?
>>>>
>>>
>>> I was thinking passing the device_node along with the device in the
>>> function arguments. More specifically, of doing this (not tested, just
>>> an idea):
>>>
>>> bool xen_is_grant_dma_device(struct device *dev)
>>> {
>>> struct device_node *np;
>>> bool has_iommu = false;
>>>
>>> /* XXX Handle only DT devices for now */
>>> np = xen_dt_get_node(dev);
>>> if (np)
>>> has_iommu = xen_is_dt_grant_dma_device(dev, np);
>>> of_node_put(np);
>>> return has_iommu;
>>> }
>>>
>>> static bool xen_is_dt_grant_dma_device(struct device *dev,
>>> struct device_node *np)
>>> {
>>> struct device_node *iommu_np = NULL;
>>> bool has_iommu;
>>>
>>> if (dev_is_pci(dev)) {
>>> struct pci_dev *pdev = to_pci_dev(dev);
>>> u32 id = PCI_DEVID(pdev->bus->number, pdev->devfn);
>>> of_map_id(np, id, "iommu-map", "iommu-map-mask", &iommu_np,
>>> NULL);
>>> } else {
>>> iommu_np = of_parse_phandle(np, "iommus", 0);
>>> }
>>>
>>> has_iommu = iommu_np && of_device_is_compatible(iommu_np,
>>> "xen,grant-dma");
>>> of_node_put(iommu_np);
>>>
>>> return has_iommu;
>>> }
>>
>>
>> I got it.
>>
>> xen_is_grant_dma_device() for V3 won't call xen_dt_get_node(), but call
>> xen_is_dt_grant_dma_device() directly.
>>
>> static bool xen_is_dt_grant_dma_device(struct device *dev)
>> {
>> struct device_node *iommu_np = NULL;
>> bool has_iommu;
>>
>> if (dev_is_pci(dev)) {
>> if (xen_dt_map_id(dev, &iommu_np, NULL))
>> return false;
>> } else if (dev->of_node)
>> iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
>> else
>> return false;
>>
>> has_iommu = iommu_np &&
>> of_device_is_compatible(iommu_np, "xen,grant-dma");
>> of_node_put(iommu_np);
>>
>> return has_iommu;
>> }
>>
>> bool xen_is_grant_dma_device(struct device *dev)
>> {
>> /* XXX Handle only DT devices for now */
>> return xen_is_dt_grant_dma_device(dev);
>> }
>>
>>
>
> Ok. One difference, that I see from the previous, is that here you
> don't use the dynamic interface when you access the dev->of_node
> (of_node_get/of_node_put). Before, this was guarded through the
> external xen_dt_get_node().
>
> I suspect that the same needs to be done for the function
> xen_grant_setup_dma_ops(). There, also, the code walks up to the root
> bus twice.
Hmm, xen_dt_grant_init_backend_domid() should only be called if we deal
with device-tree based device.
I think you are completely right, thanks!
In order to address both your comments, I think I need to rework the
code (taking into the account your example with xen_is_dt_grant_dma_device()
provided a few letters ago and extrapolate this example to
xen_dt_grant_init_backend_domid()). Below the patch (not tested) which
seems to address both your comments (also I dropped
xen_dt_map_id() and squashed xen_dt_get_pci_host_node() with
xen_dt_get_node()).
diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c
index daa525df7bdc..dae24dbd2ef7 100644
--- a/drivers/xen/grant-dma-ops.c
+++ b/drivers/xen/grant-dma-ops.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/dma-map-ops.h>
#include <linux/of.h>
+#include <linux/pci.h>
#include <linux/pfn.h>
#include <linux/xarray.h>
#include <linux/virtio_anchor.h>
@@ -292,12 +293,33 @@ static const struct dma_map_ops xen_grant_dma_ops = {
.dma_supported = xen_grant_dma_supported,
};
-static bool xen_is_dt_grant_dma_device(struct device *dev)
+static struct device_node *xen_dt_get_node(struct device *dev)
{
- struct device_node *iommu_np;
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct pci_host_bridge *bridge =
pci_find_host_bridge(pdev->bus);
+
+ return of_node_get(bridge->dev.parent->of_node);
+ }
+
+ return of_node_get(dev->of_node);
+}
+
+static bool xen_is_dt_grant_dma_device(struct device *dev,
+ struct device_node *np)
+{
+ struct device_node *iommu_np = NULL;
bool has_iommu;
- iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(dev);
+ u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn);
+
+ if (of_map_id(np, rid, "iommu-map", "iommu-map-mask",
&iommu_np, NULL))
+ return false;
+ } else
+ iommu_np = of_parse_phandle(np, "iommus", 0);
+
has_iommu = iommu_np &&
of_device_is_compatible(iommu_np, "xen,grant-dma");
of_node_put(iommu_np);
@@ -307,9 +329,17 @@ static bool xen_is_dt_grant_dma_device(struct
device *dev)
bool xen_is_grant_dma_device(struct device *dev)
{
+ struct device_node *np;
+
/* XXX Handle only DT devices for now */
- if (dev->of_node)
- return xen_is_dt_grant_dma_device(dev);
+ np = xen_dt_get_node(dev);
+ if (np) {
+ bool ret;
+
+ ret = xen_is_dt_grant_dma_device(dev, np);
+ of_node_put(np);
+ return ret;
+ }
return false;
}
@@ -323,14 +353,26 @@ bool xen_virtio_mem_acc(struct virtio_device *dev)
}
static int xen_dt_grant_init_backend_domid(struct device *dev,
+ struct device_node *np,
struct xen_grant_dma_data *data)
{
- struct of_phandle_args iommu_spec;
+ struct of_phandle_args iommu_spec = { .args_count = 1 };
- if (of_parse_phandle_with_args(dev->of_node, "iommus",
"#iommu-cells",
- 0, &iommu_spec)) {
- dev_err(dev, "Cannot parse iommus property\n");
- return -ESRCH;
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(dev);
+ u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn);
+
+ if (of_map_id(np, rid, "iommu-map", "iommu-map-mask",
&iommu_spec.np,
+ iommu_spec.args)) {
+ dev_err(dev, "Cannot translate ID\n");
+ return -ESRCH;
+ }
+ } else {
+ if (of_parse_phandle_with_args(np, "iommus", "#iommu-cells",
+ 0, &iommu_spec)) {
+ dev_err(dev, "Cannot parse iommus property\n");
+ return -ESRCH;
+ }
}
if (!of_device_is_compatible(iommu_spec.np, "xen,grant-dma") ||
@@ -354,6 +396,7 @@ static int xen_dt_grant_init_backend_domid(struct
device *dev,
void xen_grant_setup_dma_ops(struct device *dev)
{
struct xen_grant_dma_data *data;
+ struct device_node *np;
data = find_xen_grant_dma_data(dev);
if (data) {
@@ -365,8 +408,13 @@ void xen_grant_setup_dma_ops(struct device *dev)
if (!data)
goto err;
- if (dev->of_node) {
- if (xen_dt_grant_init_backend_domid(dev, data))
+ np = xen_dt_get_node(dev);
+ if (np) {
+ int ret;
+
+ ret = xen_dt_grant_init_backend_domid(dev, np, data);
+ of_node_put(np);
+ if (ret)
goto err;
} else if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) {
dev_info(dev, "Using dom0 as backend\n");
Does it look ok now?
>
>
>>>
>>> I 'm wondering ... is it possible for the host bridge device node to
>>> have the iommus property set? meaning that all of its pci devs will
>>> have the same backend?
>>
>> Good question. I think, it is possible... This is technically what V1 is
>> doing.
>>
>>
>> Are you asking because to support "iommus" for PCI devices as well to
>> describe that use-case with all PCI devices having the same endpoint ID
>> (backend ID)?
>> If yes, I think, this could be still described by "iommu-map" property,
>> something like that (if we don't want to describe mapping for each PCI
>> device one-by-one).
>>
>> iommu-map = <0x0 &iommu X 0x1>;
>>
>> iommu-map-mask = <0x0>;
>>
>> where the X is backend ID.
>>
>>
>> It feels to me that it should be written down somewhere that for
>> platform devices we expect "iommus" and for PCI devices we expect
>> "iommu-map/iommu-map-mask" to be present.
>
> Thanks for the clarification, now I got it. Yes I agree.
ok, good
>
>>>
>>>
>>>>>
>>>>>>
>>>>>>> return false;
>>>>>>> }
>>>>>>> @@ -325,12 +377,19 @@ bool xen_virtio_mem_acc(struct virtio_device
>>>>>>> *dev)
>>>>>>> static int xen_dt_grant_init_backend_domid(struct device *dev,
>>>>>>> struct xen_grant_dma_data *data)
>>>>>>> {
>>>>>>> - struct of_phandle_args iommu_spec;
>>>>>>> + struct of_phandle_args iommu_spec = { .args_count = 1 };
>>>>>>> - if (of_parse_phandle_with_args(dev->of_node, "iommus",
>>>>>>> "#iommu-cells",
>>>>>>> - 0, &iommu_spec)) {
>>>>>>> - dev_err(dev, "Cannot parse iommus property\n");
>>>>>>> - return -ESRCH;
>>>>>>> + if (dev_is_pci(dev)) {
>>>>>>> + if (xen_dt_map_id(dev, &iommu_spec.np, iommu_spec.args)) {
>>>>>>> + dev_err(dev, "Cannot translate ID\n");
>>>>>>> + return -ESRCH;
>>>>>>> + }
>>>>>>> + } else {
>>>>>>> + if (of_parse_phandle_with_args(dev->of_node, "iommus",
>>>>>>> "#iommu-cells",
>>>>>>> + 0, &iommu_spec)) {
>>>>>>> + dev_err(dev, "Cannot parse iommus property\n");
>>>>>>> + return -ESRCH;
>>>>>>> + }
>>>>>>> }
>>>>>>> if (!of_device_is_compatible(iommu_spec.np,
>>>>>>> "xen,grant-dma") ||
>>>>>>> @@ -354,6 +413,7 @@ static int
>>>>>>> xen_dt_grant_init_backend_domid(struct device *dev,
>>>>>>> void xen_grant_setup_dma_ops(struct device *dev)
>>>>>>> {
>>>>>>> struct xen_grant_dma_data *data;
>>>>>>> + struct device_node *np;
>>>>>>> data = find_xen_grant_dma_data(dev);
>>>>>>> if (data) {
>>>>>>> @@ -365,8 +425,13 @@ void xen_grant_setup_dma_ops(struct device
>>>>>>> *dev)
>>>>>>> if (!data)
>>>>>>> goto err;
>>>>>>> - if (dev->of_node) {
>>>>>>> - if (xen_dt_grant_init_backend_domid(dev, data))
>>>>>>> + np = xen_dt_get_node(dev);
>>>>>>> + if (np) {
>>>>>>> + int ret;
>>>>>>> +
>>>>>>> + ret = xen_dt_grant_init_backend_domid(dev, data);
>>>>>>> + of_node_put(np);
>>>>>>> + if (ret)
>>>>>>> goto err;
>>>>>>> } else if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) {
>>>>>>> dev_info(dev, "Using dom0 as backend\n");
>>>>>>> --
>>>>>>> 2.25.1
>>>>>>>
>>>>>>
>>>>>
>>>
>
--
Regards,
Oleksandr Tyshchenko