[PATCH] vfio: align capability structures

From: Stefan Hajnoczi
Date: Thu Aug 03 2023 - 10:43:51 EST


The VFIO_DEVICE_GET_INFO, VFIO_DEVICE_GET_REGION_INFO, and
VFIO_IOMMU_GET_INFO ioctls fill in an info struct followed by capability
structs:

+------+---------+---------+-----+
| info | caps[0] | caps[1] | ... |
+------+---------+---------+-----+

Both the info and capability struct sizes are not always multiples of
sizeof(u64), leaving u64 fields in later capability structs misaligned.

Userspace applications currently need to handle misalignment manually in
order to support CPU architectures and programming languages with strict
alignment requirements.

Make life easier for userspace by ensuring alignment in the kernel.
The new layout is as follows:

+------+---+---------+---------+---+-----+
| info | 0 | caps[0] | caps[1] | 0 | ... |
+------+---+---------+---------+---+-----+

In this example info and caps[1] have sizes that are not multiples of
sizeof(u64), so zero padding is added to align the subsequent structure.

Adding zero padding between structs does not break the uapi. The memory
layout is specified by the info.cap_offset and caps[i].next fields
filled in by the kernel. Applications use these field values to locate
structs and are therefore unaffected by the addition of zero padding.

Signed-off-by: Stefan Hajnoczi <stefanha@xxxxxxxxxx>
---
include/linux/vfio.h | 2 +-
drivers/gpu/drm/i915/gvt/kvmgt.c | 7 +++--
drivers/s390/cio/vfio_ccw_ops.c | 7 +++--
drivers/vfio/pci/vfio_pci_core.c | 14 ++++++---
drivers/vfio/vfio_iommu_type1.c | 7 +++--
drivers/vfio/vfio_main.c | 53 +++++++++++++++++++++++++++-----
6 files changed, 71 insertions(+), 19 deletions(-)

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 2c137ea94a3e..ff0864e73cc3 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -272,7 +272,7 @@ struct vfio_info_cap {
struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
size_t size, u16 id,
u16 version);
-void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
+ssize_t vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);

int vfio_info_add_capability(struct vfio_info_cap *caps,
struct vfio_info_cap_header *cap, size_t size);
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index de675d799c7d..9060e9c6ac7c 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1297,7 +1297,10 @@ static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd,
info.argsz = sizeof(info) + caps.size;
info.cap_offset = 0;
} else {
- vfio_info_cap_shift(&caps, sizeof(info));
+ ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info));
+ if (cap_offset < 0)
+ return cap_offset;
+
if (copy_to_user((void __user *)arg +
sizeof(info), caps.buf,
caps.size)) {
@@ -1305,7 +1308,7 @@ static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd,
kfree(sparse);
return -EFAULT;
}
- info.cap_offset = sizeof(info);
+ info.cap_offset = cap_offset;
}

kfree(caps.buf);
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 5b53b94f13c7..63d5163376a5 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -361,13 +361,16 @@ static int vfio_ccw_mdev_get_region_info(struct vfio_ccw_private *private,
info->argsz = sizeof(*info) + caps.size;
info->cap_offset = 0;
} else {
- vfio_info_cap_shift(&caps, sizeof(*info));
+ ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(*info));
+ if (cap_offset < 0)
+ return cap_offset;
+
if (copy_to_user((void __user *)arg + sizeof(*info),
caps.buf, caps.size)) {
kfree(caps.buf);
return -EFAULT;
}
- info->cap_offset = sizeof(*info);
+ info->cap_offset = cap_offset;
}

kfree(caps.buf);
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 20d7b69ea6ff..92c093b99187 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -966,12 +966,15 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
if (info.argsz < sizeof(info) + caps.size) {
info.argsz = sizeof(info) + caps.size;
} else {
- vfio_info_cap_shift(&caps, sizeof(info));
+ ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info));
+ if (cap_offset < 0)
+ return cap_offset;
+
if (copy_to_user(arg + 1, caps.buf, caps.size)) {
kfree(caps.buf);
return -EFAULT;
}
- info.cap_offset = sizeof(*arg);
+ info.cap_offset = cap_offset;
}

kfree(caps.buf);
@@ -1107,12 +1110,15 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
info.argsz = sizeof(info) + caps.size;
info.cap_offset = 0;
} else {
- vfio_info_cap_shift(&caps, sizeof(info));
+ ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info));
+ if (cap_offset < 0)
+ return cap_offset;
+
if (copy_to_user(arg + 1, caps.buf, caps.size)) {
kfree(caps.buf);
return -EFAULT;
}
- info.cap_offset = sizeof(*arg);
+ info.cap_offset = cap_offset;
}

kfree(caps.buf);
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index ebe0ad31d0b0..ab64b9e3ed7c 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2808,14 +2808,17 @@ static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
if (info.argsz < sizeof(info) + caps.size) {
info.argsz = sizeof(info) + caps.size;
} else {
- vfio_info_cap_shift(&caps, sizeof(info));
+ ssize_t cap_offset = vfio_info_cap_shift(&caps, sizeof(info));
+ if (cap_offset < 0)
+ return cap_offset;
+
if (copy_to_user((void __user *)arg +
sizeof(info), caps.buf,
caps.size)) {
kfree(caps.buf);
return -EFAULT;
}
- info.cap_offset = sizeof(info);
+ info.cap_offset = cap_offset;
}

kfree(caps.buf);
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index f0ca33b2e1df..4fc8698577a7 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -1171,8 +1171,18 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
{
void *buf;
struct vfio_info_cap_header *header, *tmp;
+ size_t header_offset;
+ size_t new_size;

- buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
+ /*
+ * Reserve extra space when the previous capability was not a multiple of
+ * the largest field size. This ensures that capabilities are properly
+ * aligned.
+ */
+ header_offset = ALIGN(caps->size, sizeof(u64));
+ new_size = header_offset + size;
+
+ buf = krealloc(caps->buf, new_size, GFP_KERNEL);
if (!buf) {
kfree(caps->buf);
caps->buf = NULL;
@@ -1181,10 +1191,10 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
}

caps->buf = buf;
- header = buf + caps->size;
+ header = buf + header_offset;

/* Eventually copied to user buffer, zero */
- memset(header, 0, size);
+ memset(buf + caps->size, 0, new_size - caps->size);

header->id = id;
header->version = version;
@@ -1193,20 +1203,47 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
for (tmp = buf; tmp->next; tmp = buf + tmp->next)
; /* nothing */

- tmp->next = caps->size;
- caps->size += size;
+ tmp->next = header_offset;
+ caps->size = new_size;

return header;
}
EXPORT_SYMBOL_GPL(vfio_info_cap_add);

-void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
+/*
+ * Adjust the capability next fields to account for the given offset at which
+ * capability structures start and any padding added for alignment. Returns the
+ * cap_offset or -errno.
+ */
+ssize_t vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
{
struct vfio_info_cap_header *tmp;
+ struct vfio_info_cap_header *next_tmp;
void *buf = (void *)caps->buf;
+ size_t pad = ALIGN(offset, sizeof(u64)) - offset;
+ size_t cap_offset = offset + pad;

- for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
- tmp->next += offset;
+ /* Shift the next fields to account for offset and pad */
+ for (tmp = buf; tmp->next; tmp = next_tmp) {
+ next_tmp = buf + tmp->next;
+ tmp->next += cap_offset;
+ }
+
+ /* Pad with zeroes so capabilities start with proper alignment */
+ buf = krealloc(caps->buf, caps->size + pad, GFP_KERNEL);
+ if (!buf) {
+ kfree(caps->buf);
+ caps->buf = NULL;
+ caps->size = 0;
+ return -ENOMEM;
+ }
+
+ memmove(buf + pad, buf, caps->size);
+ memset(buf, 0, pad);
+
+ caps->buf = buf;
+ caps->size += pad;
+ return cap_offset;
}
EXPORT_SYMBOL(vfio_info_cap_shift);

--
2.41.0