Re: [PATCH 06/20] vfio/cxl: Add UAPI for CXL Type-2 device passthrough

From: Dave Jiang

Date: Thu Mar 12 2026 - 17:07:12 EST




On 3/11/26 1:34 PM, mhonap@xxxxxxxxxx wrote:
> From: Manish Honap <mhonap@xxxxxxxxxx>
>
> CXL capabilities include:
> - hdm_count: Number of HDM decoders available
> - capacity: Total device memory (DPA)
> - flags: COMMITTED, PRECOMMITTED
>
> This UAPI enables VMMs like QEMU to passthrough CXL Type-2 devices
> (GPUs, accelerators) with coherent memory to VMs.
>
> Also added user-kernel API definitions for CXL Type-2 device passthrough.
> Document how VFIO_DEVICE_FLAGS_CXL relates to VFIO_DEVICE_FLAGS_PCI
> and VFIO_DEVICE_FLAGS_CAPS, and add field and flag descriptions
> for the CXL capability.
>
> Signed-off-by: Manish Honap <mhonap@xxxxxxxxxx>
> ---
> include/uapi/linux/vfio.h | 52 +++++++++++++++++++++++++++++++++++++++
> 1 file changed, 52 insertions(+)
>
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index ac2329f24141..7ec0f96cc2d9 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -215,6 +215,13 @@ struct vfio_device_info {
> #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */
> #define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */
> #define VFIO_DEVICE_FLAGS_CDX (1 << 8) /* vfio-cdx device */
> +/*
> + * CXL Type-2 device (memory coherent; e.g. GPU, accelerator). When set,
> + * VFIO_DEVICE_FLAGS_PCI is also set (same device is a PCI device). The
> + * capability chain (VFIO_DEVICE_FLAGS_CAPS) contains VFIO_DEVICE_INFO_CAP_CXL
> + * describing HDM decoders, DPA size, and CXL-specific options.
> + */
> +#define VFIO_DEVICE_FLAGS_CXL (1 << 9) /* Device supports CXL */
> __u32 num_regions; /* Max region index + 1 */
> __u32 num_irqs; /* Max IRQ index + 1 */
> __u32 cap_offset; /* Offset within info struct of first cap */
> @@ -257,6 +264,39 @@ struct vfio_device_info_cap_pci_atomic_comp {
> __u32 reserved;
> };
>
> +/*
> + * VFIO_DEVICE_INFO_CAP_CXL - CXL Type-2 device capability
> + *
> + * Present in the device info capability chain when VFIO_DEVICE_FLAGS_CXL
> + * is set. Describes Host Managed Device Memory (HDM) layout and CXL
> + * memory options so that userspace (e.g. QEMU) can expose the CXL region
> + * and component registers correctly to the guest.
> + */
> +#define VFIO_DEVICE_INFO_CAP_CXL 6
> +struct vfio_device_info_cap_cxl {
> + struct vfio_info_cap_header header;
> + __u8 hdm_count; /* Number of HDM decoders */
> + __u8 hdm_regs_bar_index; /* PCI BAR containing HDM registers */
> + __u16 pad;
> + __u32 flags;
> +/* Decoder was committed by host firmware/BIOS */

I'm confused by COMMITTED vs PRECOMMITTED. Should it just say "Decoder is committed" here? Otherwise what is the difference? Also can you explain a little the usage for COMMITTED vs PRECOMMITTED in the commit log please? i.e why does VFIO CXL needs to know a decoder is pre-committed?

DJ

> +#define VFIO_CXL_CAP_COMMITTED (1 << 0)
> +/*
> + * Memory was pre-committed (firmware-programmed); VMM need not allocate
> + * from CXL pool
> + */
> +#define VFIO_CXL_CAP_PRECOMMITTED (1 << 1)
> + __u64 hdm_regs_size; /* Size in bytes of HDM register block */
> + __u64 hdm_regs_offset; /* Byte offset within the BAR to the HDM decoder block */
> + __u64 dpa_size; /* Device Physical Address (DPA) size in bytes */
> + /*
> + * Region indices for the two CXL VFIO device regions.
> + * Avoids forcing userspace to scan all regions by type/subtype.
> + */
> + __u32 dpa_region_index; /* VFIO_REGION_SUBTYPE_CXL */
> + __u32 comp_regs_region_index; /* VFIO_REGION_SUBTYPE_CXL_COMP_REGS */
> +};
> +
> /**
> * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
> * struct vfio_region_info)
> @@ -370,6 +410,18 @@ struct vfio_region_info_cap_type {
> */
> #define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1)
>
> +/* 1e98 vendor PCI sub-types (CXL Consortium) */
> +/*
> + * CXL memory region. Use with region type
> + * (PCI_VENDOR_ID_CXL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE).
> + * DPA memory region (fault+zap mmap)
> + */
> +#define VFIO_REGION_SUBTYPE_CXL (1)
> +/*
> + * HDM decoder register emulation region (read/write only, no mmap).
> + */
> +#define VFIO_REGION_SUBTYPE_CXL_COMP_REGS (2)
> +
> /* sub-types for VFIO_REGION_TYPE_GFX */
> #define VFIO_REGION_SUBTYPE_GFX_EDID (1)
>