On Fri, 2015-03-13 at 19:07 +1100, Alexey Kardashevskiy wrote:
The existing IOMMU requires VFIO_IOMMU_ENABLE call to enable actual use
of the container (i.e. call DMA map/unmap) and this is where we check
the rlimit for locked pages. It assumes that only as much memory
as a default DMA window can be mapped. Every DMA map/unmap request will
do pinning/unpinning of physical pages.
New IOMMU will split physical pages pinning and TCE table update.
It will require guest pages to be registered first and consequent
map/unmap requests to work only with pre-registered memory.
For the default single window case this means that the entire guest
(instead of 2GB) needs to be pinned before using VFIO.
However when a huge DMA window is added, no additional pinning will be
required, otherwise it would be guest RAM + 2GB.
This advertises v2 SPAPR TCE IOMMU and restricts what the userspace
can do with v1 or v2 IOMMUs.
Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxxxx>
---
Changes:
v6:
* enforced limitations imposed by the SPAPR TCE IOMMU version
---
drivers/vfio/vfio_iommu_spapr_tce.c | 18 +++++++++++++++++-
include/uapi/linux/vfio.h | 2 ++
2 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 9d240b4..e191438 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -95,6 +95,7 @@ struct tce_container {
bool enabled;
unsigned long locked_pages;
struct list_head mem_list;
+ bool v2;
};
struct tce_memory {
@@ -398,7 +399,7 @@ static void *tce_iommu_open(unsigned long arg)
{
struct tce_container *container;
- if (arg != VFIO_SPAPR_TCE_IOMMU) {
+ if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
pr_err("tce_vfio: Wrong IOMMU type\n");
return ERR_PTR(-EINVAL);
}
@@ -410,6 +411,8 @@ static void *tce_iommu_open(unsigned long arg)
mutex_init(&container->lock);
INIT_LIST_HEAD_RCU(&container->mem_list);
+ container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
+
return container;
}
@@ -580,6 +583,7 @@ static long tce_iommu_ioctl(void *iommu_data,
case VFIO_CHECK_EXTENSION:
switch (arg) {
case VFIO_SPAPR_TCE_IOMMU:
+ case VFIO_SPAPR_TCE_v2_IOMMU:
ret = 1;
break;
default:
@@ -719,6 +723,9 @@ static long tce_iommu_ioctl(void *iommu_data,
case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
struct vfio_iommu_spapr_register_memory param;
+ if (!container->v2)
+ return -EPERM;
+
minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
size);
@@ -741,6 +748,9 @@ static long tce_iommu_ioctl(void *iommu_data,
case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
struct vfio_iommu_spapr_register_memory param;
+ if (!container->v2)
+ return -EPERM;
+
minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
size);
@@ -761,6 +771,9 @@ static long tce_iommu_ioctl(void *iommu_data,
return 0;
}
case VFIO_IOMMU_ENABLE:
+ if (container->v2)
+ return -EPERM;
+
mutex_lock(&container->lock);
ret = tce_iommu_enable(container);
mutex_unlock(&container->lock);
@@ -768,6 +781,9 @@ static long tce_iommu_ioctl(void *iommu_data,
case VFIO_IOMMU_DISABLE:
+ if (container->v2)
+ return -EPERM;
+
mutex_lock(&container->lock);
tce_iommu_disable(container);
mutex_unlock(&container->lock);
I wouldn't have guessed; nothing in the documentation suggests these
ioctls are deprecated in v2 (ie. please document). If the ioctl doesn't
exist for the IOMMU type, why not simply break and let it fall out at
-ENOTTY? Same for the above, v1 would have previously returned -ENOTTY
for those ioctls, why change to -EPERM?