On Thu, Aug 18, 2022 at 4:21 AM Christian König
<christian.koenig@xxxxxxx> wrote:
Am 17.08.22 um 15:44 schrieb Rob Clark:This is true on arm64 as well, but only in the S1 tables (which I
On Wed, Aug 17, 2022 at 2:57 AM Christian KönigThe channel for the address is not mmap(), but rather the page faults.
<christian.koenig@xxxxxxx> wrote:
[SNIP]How would that work.. mmap() is the channel for the address, we'd need
The resulting cache attrs from combination of S1 and S2 translation
can differ. So ideally we setup the S2 pgtables in guest aligned with
host userspace mappings
Well exactly that is not very convincing.
What you want to do is to use one channel for the address and a
different one for the cache attrs, that's not something I would
recommend doing in general.
to introduce a new syscall that returned additional information?
mmap() is just the function setting up that channel.
The page faults then insert both the address as well as the caching
attributes (at least on x86).
would have to assume is the case on x86 as well)
That we then need to forward the caching attributes manually once moreOn arm64/aarch64, they *can*.. but the system (on some versions of
seems really misplaced.
Well you mentioned that the client page tables can be setup in a wayInstead the client pgtables should be setup in a way so that host canHow? That is completely not how VMs work. Even if the host knew
overwrite them.
where the pgtables were and somehow magically knew the various guest
userspace VAs, it would be racey.
that the host page tables determine what caching to use. As far as I can
see this is what we should use here.
armv8) can also be configured to let S2 determine the attributes. And
apparently there are benefits to this (avoids unnecessary cache
flushing in the host, AFAIU.) This is the case where we need this new
api.
IMO it is fine for the exporter to return a value indicating that the
attributes change dynamically or that S1 attributes must somehow be
used by the hw. This would at least let the VMM return an error in
cases where S1 attrs cannot be relied on. But there are enough
exporters where the cache attrs are static for the life of the buffer.
So even if you need to return DMA_BUF_MAP_I_DONT_KNOW, maybe that is
fine (if x86 can always rely on S1 attrs), or at least will let the
VMM return an error rather than just blindly assuming things will
work.
But it makes no sense to reject the whole idea just because of some
exporters (which may not even need this). There is always room to let
them return a map-info value that describes the situation or
limitations to the VMM.
BR,
-R
Regards,
Christian.
BR,
-R
Regards,
Christian.
BR,
-R
Regards,
Christian.
BR,
-R
If the hardware can't use the caching information from the host CPU page
tables directly then that pretty much completely breaks the concept that
the exporter is responsible for setting up those page tables.
Regards,
Christian.
drivers/dma-buf/dma-buf.c | 63 +++++++++++++++++++++++++++------
include/linux/dma-buf.h | 11 ++++++
include/uapi/linux/dma-buf.h | 68 ++++++++++++++++++++++++++++++++++++
3 files changed, 132 insertions(+), 10 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 32f55640890c..262c4706f721 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -125,6 +125,32 @@ static struct file_system_type dma_buf_fs_type = {
.kill_sb = kill_anon_super,
};
+static int __dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+ int ret;
+
+ /* check if buffer supports mmap */
+ if (!dmabuf->ops->mmap)
+ return -EINVAL;
+
+ ret = dmabuf->ops->mmap(dmabuf, vma);
+
+ /*
+ * If the exporter claims to support coherent access, ensure the
+ * pgprot flags match the claim.
+ */
+ if ((dmabuf->map_info != DMA_BUF_MAP_INCOHERENT) && !ret) {
+ pgprot_t wc_prot = pgprot_writecombine(vma->vm_page_prot);
+ if (dmabuf->map_info == DMA_BUF_COHERENT_WC) {
+ WARN_ON_ONCE(pgprot_val(vma->vm_page_prot) != pgprot_val(wc_prot));
+ } else {
+ WARN_ON_ONCE(pgprot_val(vma->vm_page_prot) == pgprot_val(wc_prot));
+ }
+ }
+
+ return ret;
+}
+
static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma)
{
struct dma_buf *dmabuf;
@@ -134,16 +160,12 @@ static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma)
dmabuf = file->private_data;
- /* check if buffer supports mmap */
- if (!dmabuf->ops->mmap)
- return -EINVAL;
-
/* check for overflowing the buffer's size */
if (vma->vm_pgoff + vma_pages(vma) >
dmabuf->size >> PAGE_SHIFT)
return -EINVAL;
- return dmabuf->ops->mmap(dmabuf, vma);
+ return __dma_buf_mmap(dmabuf, vma);
}
static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence)
@@ -326,6 +348,27 @@ static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf)
return 0;
}
+static long dma_buf_info(struct dma_buf *dmabuf, void __user *uarg)
+{
+ struct dma_buf_info arg;
+
+ if (copy_from_user(&arg, uarg, sizeof(arg)))
+ return -EFAULT;
+
+ switch (arg.param) {
+ case DMA_BUF_INFO_MAP_INFO:
+ arg.value = dmabuf->map_info;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (copy_to_user(uarg, &arg, sizeof(arg)))
+ return -EFAULT;
+
+ return 0;
+}
+
static long dma_buf_ioctl(struct file *file,
unsigned int cmd, unsigned long arg)
{
@@ -369,6 +412,9 @@ static long dma_buf_ioctl(struct file *file,
case DMA_BUF_SET_NAME_B:
return dma_buf_set_name(dmabuf, (const char __user *)arg);
+ case DMA_BUF_IOCTL_INFO:
+ return dma_buf_info(dmabuf, (void __user *)arg);
+
default:
return -ENOTTY;
}
@@ -530,6 +576,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
dmabuf->priv = exp_info->priv;
dmabuf->ops = exp_info->ops;
dmabuf->size = exp_info->size;
+ dmabuf->map_info = exp_info->map_info;
dmabuf->exp_name = exp_info->exp_name;
dmabuf->owner = exp_info->owner;
spin_lock_init(&dmabuf->name_lock);
@@ -1245,10 +1292,6 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
if (WARN_ON(!dmabuf || !vma))
return -EINVAL;
- /* check if buffer supports mmap */
- if (!dmabuf->ops->mmap)
- return -EINVAL;
-
/* check for offset overflow */
if (pgoff + vma_pages(vma) < pgoff)
return -EOVERFLOW;
@@ -1262,7 +1305,7 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
vma_set_file(vma, dmabuf->file);
vma->vm_pgoff = pgoff;
- return dmabuf->ops->mmap(dmabuf, vma);
+ return __dma_buf_mmap(dmabuf, vma);
}
EXPORT_SYMBOL_NS_GPL(dma_buf_mmap, DMA_BUF);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 71731796c8c3..37923c8d5c24 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -23,6 +23,8 @@
#include <linux/dma-fence.h>
#include <linux/wait.h>
+#include <uapi/linux/dma-buf.h>
+
struct device;
struct dma_buf;
struct dma_buf_attachment;
@@ -307,6 +309,13 @@ struct dma_buf {
*/
size_t size;
+ /**
+ * @map_info:
+ *
+ * CPU mapping/coherency information for the buffer.
+ */
+ enum dma_buf_map_info map_info;
+
/**
* @file:
*
@@ -533,6 +542,7 @@ struct dma_buf_attachment {
* @ops: Attach allocator-defined dma buf ops to the new buffer
* @size: Size of the buffer - invariant over the lifetime of the buffer
* @flags: mode flags for the file
+ * @map_info: CPU mapping/coherency information for the buffer
* @resv: reservation-object, NULL to allocate default one
* @priv: Attach private data of allocator to this buffer
*
@@ -545,6 +555,7 @@ struct dma_buf_export_info {
const struct dma_buf_ops *ops;
size_t size;
int flags;
+ enum dma_buf_map_info map_info;
struct dma_resv *resv;
void *priv;
};
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
index b1523cb8ab30..07b403ffdb43 100644
--- a/include/uapi/linux/dma-buf.h
+++ b/include/uapi/linux/dma-buf.h
@@ -85,6 +85,72 @@ struct dma_buf_sync {
#define DMA_BUF_NAME_LEN 32
+/**
+ * enum dma_buf_map_info - CPU mapping info
+ *
+ * This enum describes coherency of a userspace mapping of the dmabuf.
+ *
+ * Importing devices should check dma_buf::map_info flag and reject an
+ * import if unsupported. For example, if the exporting device uses
+ * @DMA_BUF_COHERENT_CACHED but the importing device does not support
+ * CPU cache coherency, the dma-buf import should fail.
+ */
+enum dma_buf_map_info {
+ /**
+ * @DMA_BUF_MAP_INCOHERENT: CPU mapping is incoherent.
+ *
+ * Use of DMA_BUF_IOCTL_SYNC is required for CPU managed coherenency.
+ */
+ DMA_BUF_MAP_INCOHERENT,
+
+ /**
+ * @DMA_BUF_COHERENT_WC: CPU mapping is coherent but not cached.
+ *
+ * A cpu mmap'ing is coherent, and DMA_BUF_IOCTL_SYNC is not required.
+ * However fences may be still required for synchronizing access. Ie.
+ * coherency can only be relied upon by an explicit-fencing userspace.
+ * An implicit-sync userspace must still use DMA_BUF_IOCTL_SYNC.
+ *
+ * The cpu mapping is writecombine.
+ */
+ DMA_BUF_COHERENT_WC,
+
+ /**
+ * @DMA_BUF_COHERENT_CACHED: CPU mapping is coherent and CPU cached.
+ *
+ * A cpu mmap'ing is coherent, and DMA_BUF_IOCTL_SYNC is not required.
+ * However fences may be still required for synchronizing access. Ie.
+ * coherency can only be relied upon by an explicit-fencing userspace.
+ * An implicit-sync userspace must still use DMA_BUF_IOCTL_SYNC.
+ *
+ * The cpu mapping is cached.
+ */
+ DMA_BUF_COHERENT_CACHED,
+};
+
+/**
+ * struct dma_buf_info - Query info about the buffer.
+ */
+struct dma_buf_info {
+
+#define DMA_BUF_INFO_MAP_INFO 1
+
+ /**
+ * @param: Which param to query
+ *
+ * DMA_BUF_INFO_MAP_INFO:
+ * Returns enum dma_buf_map_info, describing the coherency and
+ * caching of a CPU mapping of the buffer.
+ */
+ __u32 param;
+ __u32 pad;
+
+ /**
+ * @value: Return value of the query.
+ */
+ __u64 value;
+};
+
#define DMA_BUF_BASE 'b'
#define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
@@ -95,4 +161,6 @@ struct dma_buf_sync {
#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32)
#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64)
+#define DMA_BUF_IOCTL_INFO _IOWR(DMA_BUF_BASE, 2, struct dma_buf_info)
+
#endif