+ u64 sw_bypass_cpu_start;
+ u64 sw_bypass_dma_start;
+ u64 sw_bypass_len;
+
+ struct list_head streams;
I'm staring to think this could just be a bitmap, in a u16 even.
The problem is that these streams may come from two different
DART instances. That is required for e.g. the dwc3 controller which
has a weird quirk where DMA transactions go through two separate
DARTs with no clear pattern (e.g. some xhci control structures use the
first dart while other structures use the second one).
Both of them need to point to the same pagetable.
In the device tree the node will have an entry like this:
dwc3_0: usb@382280000{
...
iommus = <&dwc3_0_dart_0 0>, <&dwc3_0_dart_1 1>;
};
There's no need for a linked list though once I do this properly with
groups. I can just use an array allocated when the first device is
attached, which just contains apple_dart* and streamid values.
+
+ spinlock_t lock;
+
+ struct iommu_domain domain;
+};
+
+/*
+ * This structure is attached to devices with dev_iommu_priv_set() on of_xlate
+ * and contains a list of streams bound to this device as defined in the
+ * device tree. Multiple DART instances can be attached to a single device
+ * and each stream is identified by its stream id.
+ * It's usually reference by a pointer called *cfg.
+ *
+ * A dynamic array instead of a linked list is used here since in almost
+ * all cases a device will just be attached to a single stream and streams
+ * are never removed after they have been added.
+ *
+ * @num_streams: number of streams attached
+ * @streams: array of structs to identify attached streams and the device link
+ * to the iommu
+ */
+struct apple_dart_master_cfg {
+ int num_streams;
+ struct {
+ struct apple_dart *dart;
+ u32 sid;
Can't you use the fwspec for this?
I'd be happy to use the fwspec code if that's somehow possible.
I'm not sure how though since I need to store both the reference to the DART
_and_ to the stream id. As far as I can tell the fwspec code would only allow
to store the stream ids.
(see also the previous comment regarding the dwc3 node which requires stream
ids from two separate DART instances)
+ struct device_link *link;
Is it necessary to use stateless links, or could you use
DL_FLAG_AUTOREMOVE_SUPPLIER and not have to keep track of them manually?
I'll just use DL_FLAG_AUTOREMOVE_SUPPLIER. No idea why I went for stateless links.
[...]
+ /* restore stream identity map */
+ writel(0x03020100, dart->regs + DART_STREAM_REMAP);
+ writel(0x07060504, dart->regs + DART_STREAM_REMAP + 4);
+ writel(0x0b0a0908, dart->regs + DART_STREAM_REMAP + 8);
+ writel(0x0f0e0d0c, dart->regs + DART_STREAM_REMAP + 12);
Any hint of what the magic numbers mean?
Yes, it's just 0,1,2,3...,0xe,0xf but I can't do 8bit writes to the bus
and 32 bit writes then require these slightly awkward "swapped" numbers.
I'll add a comment since it's not obvious at first glance.
+ /*
+ * we can't mix and match DARTs that support bypass mode with those who don't
+ * because the iova space in fake bypass mode generally has an offset
+ */
Erm, something doesn't sound right there... IOMMU_DOMAIN_IDENTITY should
be exactly what it says, regardless of how it's implemented. If you
can't provide a true identity mapping then you're probably better off
not pretending to support them in the first place.
Some background: the PCIe DART only supports a 32bit VA space but RAM
on these machines starts at 0x8_0000_0000. I have something like
dma-ranges = <0x42000000 0 0 0x8 0 0 0xffff0000>;
in the pcie nodes to add that offset to dma addresses.
What I want to do here then is to setup an identity mapping with respect
to the DMA layer understanding of addresses encoded in bus_dma_region.
Now this will always just be a constant offset of 0x8_0000_0000 for
all M1s but I didn't want to hardcode that.
The code here is just there to guard against a situation where someone
somehow manages to attach two devices with different offsets to the same
domain.
If that's not how the abstraction is supposed to work and/or too big of a hack
I'll just remove the software bypass mode altogether.
PCIe won't work on 4k kernels then but the only people using this so far
build their own kernels with patches either way and won't complain.
And by the time Linux will actually be useful for "normal" setups
the dma-iommu layer can hopefully just handle a larger page granularity.
+ if (WARN_ON(domain->type == IOMMU_DOMAIN_IDENTITY &&
+ (domain->dart->supports_bypass != dart->supports_bypass)))
+ return -EINVAL;
+
+ list_for_each_entry(stream, &domain->streams, stream_head) {
+ if (stream->dart == dart && stream->sid == sid) {
+ stream->num_devices++;
+ return 0;
+ }
+ }
+
+ spin_lock_irqsave(&dart->lock, flags);
+
+ if (WARN_ON(dart->used_sids & BIT(sid))) {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ stream = kzalloc(sizeof(*stream), GFP_ATOMIC);
+ if (!stream) {
+ ret = -ENOMEM;
+ goto error;
+ }
Couldn't you do this outside the lock? (If, calling back to other
comments, it can't get refactored out of existence anyway)
Probably, but I'll first see if I can just refactor it away.
+static struct iommu_domain *apple_dart_domain_alloc(unsigned int type)
+{
+ struct apple_dart_domain *dart_domain;
+
+ if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED &&
+ type != IOMMU_DOMAIN_IDENTITY && type != IOMMU_DOMAIN_BLOCKED)
+ return NULL;
I want to say there's not much point in that, but then I realise I've
spent the last couple of days writing patches to add a new domain type :)
Hah! Just because I'm curious: What is that new domain type going to be? :)
+static int apple_dart_of_xlate(struct device *dev, struct of_phandle_args *args)
+{
+ struct platform_device *iommu_pdev = of_find_device_by_node(args->np);
+ struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+ unsigned int num_streams = cfg ? cfg->num_streams : 0;
+ struct apple_dart_master_cfg *cfg_new;
+ struct apple_dart *dart = platform_get_drvdata(iommu_pdev);
+
+ if (args->args_count != 1)
+ return -EINVAL;
+
+ cfg_new = krealloc(cfg, struct_size(cfg, streams, num_streams + 1),
+ GFP_KERNEL);
+ if (!cfg_new)
+ return -ENOMEM;
+
+ cfg = cfg_new;
+ dev_iommu_priv_set(dev, cfg);
+
+ cfg->num_streams = num_streams;
+ cfg->streams[cfg->num_streams].dart = dart;
+ cfg->streams[cfg->num_streams].sid = args->args[0];
+ cfg->num_streams++;
Yeah, this is way too reminiscent of the fwspec code for comfort. Even
if you can't use autoremove links for some reason, an array of 16
device_link pointers hung off apple_dart still wins over these little
pointer-heavy structures if you need more than a few of them.
I can get rid of the links, but I'll still need some way to store
both the apple_dart and the sid here. Like mentioned above, I'll
be happy to reuse the fwspec code but I don't see how yet.
+static int apple_dart_remove(struct platform_device *pdev)
+{
+ struct apple_dart *dart = platform_get_drvdata(pdev);
+
+ devm_free_irq(dart->dev, dart->irq, dart);
+
+ iommu_device_unregister(&dart->iommu);
+ iommu_device_sysfs_remove(&dart->iommu);
+
+ clk_bulk_disable(dart->num_clks, dart->clks);
+ clk_bulk_unprepare(dart->num_clks, dart->clks);
Ditto.
And again the bus ops are still installed - that'll get really fun if
this is a module unload...
Ugh, yeah. I'll fix that as well. I'll have to see how to make this work
correctly with multiple DART instances. I guess I should only remove the
bus ops once the last one is removed. Now that I think about it, this
could also get tricky in the cleanup paths of apple_dart_probe.
Maybe just add a module_init that sets up the bus ops when it finds at
least one DART node and module_exit to tear them down again?