Re: [PATCH 13/20] vfio/cxl: Introduce HDM decoder register emulation framework

From: Dave Jiang

Date: Fri Mar 13 2026 - 15:06:48 EST




On 3/11/26 1:34 PM, mhonap@xxxxxxxxxx wrote:
> From: Manish Honap <mhonap@xxxxxxxxxx>
>
> Introduce an emulation framework to handle CXL MMIO register emulation
> for CXL devices passed through to a VM.
>
> A single compact __le32 array (comp_reg_virt) covers only the HDM
> decoder register block (hdm_reg_size bytes, typically 256-512 bytes).
>
> A new VFIO device region VFIO_REGION_SUBTYPE_CXL_COMP_REGS exposes
> this array to userspace (QEMU) as a read-write region:
> - Reads return the emulated state (comp_reg_virt[])
> - Writes go through the HDM register write handlers and are
> forwarded to hardware where appropriate
>
> QEMU attaches a notify_change callback to this region. When the
> COMMIT bit is written in a decoder CTRL register the callback
> reads the BASE_LO/HI from the same region fd (emulated state) and
> maps the DPA MemoryRegion at the correct GPA in system_memory.
>
> Co-developed-by: Zhi Wang <zhiw@xxxxxxxxxx>
> Signed-off-by: Zhi Wang <zhiw@xxxxxxxxxx>
> Signed-off-by: Manish Honap <mhonap@xxxxxxxxxx>
> ---
> drivers/vfio/pci/Makefile | 2 +-
> drivers/vfio/pci/cxl/vfio_cxl_core.c | 36 ++-
> drivers/vfio/pci/cxl/vfio_cxl_emu.c | 366 +++++++++++++++++++++++++++
> drivers/vfio/pci/cxl/vfio_cxl_priv.h | 41 +++
> drivers/vfio/pci/vfio_pci_priv.h | 7 +
> 5 files changed, 450 insertions(+), 2 deletions(-)
> create mode 100644 drivers/vfio/pci/cxl/vfio_cxl_emu.c
>
> diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
> index ecb0eacbc089..bef916495eae 100644
> --- a/drivers/vfio/pci/Makefile
> +++ b/drivers/vfio/pci/Makefile
> @@ -1,7 +1,7 @@
> # SPDX-License-Identifier: GPL-2.0-only
>
> vfio-pci-core-y := vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
> -vfio-pci-core-$(CONFIG_VFIO_CXL_CORE) += cxl/vfio_cxl_core.o
> +vfio-pci-core-$(CONFIG_VFIO_CXL_CORE) += cxl/vfio_cxl_core.o cxl/vfio_cxl_emu.o
> vfio-pci-core-$(CONFIG_VFIO_PCI_ZDEV_KVM) += vfio_pci_zdev.o
> vfio-pci-core-$(CONFIG_VFIO_PCI_DMABUF) += vfio_pci_dmabuf.o
> obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o
> diff --git a/drivers/vfio/pci/cxl/vfio_cxl_core.c b/drivers/vfio/pci/cxl/vfio_cxl_core.c
> index 03846bd11c8a..d2401871489d 100644
> --- a/drivers/vfio/pci/cxl/vfio_cxl_core.c
> +++ b/drivers/vfio/pci/cxl/vfio_cxl_core.c
> @@ -45,6 +45,7 @@ static int vfio_cxl_create_device_state(struct vfio_pci_core_device *vdev,
> cxl = vdev->cxl;
> cxl->dvsec = dvsec;
> cxl->dpa_region_idx = -1;
> + cxl->comp_reg_region_idx = -1;
>
> pci_read_config_word(pdev, dvsec + CXL_DVSEC_CAPABILITY_OFFSET,
> &cap_word);
> @@ -124,6 +125,10 @@ static int vfio_cxl_setup_regs(struct vfio_pci_core_device *vdev)
> cxl->comp_reg_offset = bar_offset;
> cxl->comp_reg_size = CXL_COMPONENT_REG_BLOCK_SIZE;
>
> + ret = vfio_cxl_setup_virt_regs(vdev);
> + if (ret)
> + return ret;
> +
> return 0;
> }
>
> @@ -281,12 +286,14 @@ void vfio_pci_cxl_detect_and_init(struct vfio_pci_core_device *vdev)
>
> ret = vfio_cxl_create_region_helper(vdev, SZ_256M);
> if (ret)
> - goto failed;
> + goto regs_failed;
>
> cxl->precommitted = true;
>
> return;
>
> +regs_failed:
> + vfio_cxl_clean_virt_regs(vdev);
> failed:
> devm_kfree(&pdev->dev, vdev->cxl);
> vdev->cxl = NULL;
> @@ -299,6 +306,7 @@ void vfio_pci_cxl_cleanup(struct vfio_pci_core_device *vdev)
> if (!cxl || !cxl->region)
> return;
>
> + vfio_cxl_clean_virt_regs(vdev);
> vfio_cxl_destroy_cxl_region(vdev);
> }
>
> @@ -409,6 +417,32 @@ void vfio_cxl_reactivate_region(struct vfio_pci_core_device *vdev)
>
> if (!cxl)
> return;
> +
> + /*
> + * Re-initialise the emulated HDM comp_reg_virt[] from hardware.
> + * After FLR the decoder registers read as zero; mirror that in
> + * the emulated state so QEMU sees a clean slate.
> + */
> + vfio_cxl_reinit_comp_regs(vdev);
> +
> + /*
> + * Only re-enable the DPA mmap if the hardware has actually
> + * re-committed decoder 0 after FLR. Read the COMMITTED bit from the
> + * freshly-re-snapshotted comp_reg_virt[] so we check the post-FLR
> + * hardware state, not stale pre-reset state.
> + *
> + * If COMMITTED is 0 (slow firmware re-commit path), leave
> + * region_active=false. Guest faults will return VM_FAULT_SIGBUS
> + * until the decoder is re-committed and the region is re-enabled.
> + */
> + if (cxl->precommitted && cxl->comp_reg_virt) {
> + u32 ctrl = le32_to_cpu(cxl->comp_reg_virt[
> + CXL_HDM_DECODER0_CTRL_OFFSET(0) /
> + CXL_REG_SIZE_DWORD]);
> +
> + if (ctrl & CXL_HDM_DECODER_CTRL_COMMITTED_BIT)
> + WRITE_ONCE(cxl->region_active, true);
> + }
> }
>
> static ssize_t vfio_cxl_region_rw(struct vfio_pci_core_device *core_dev,
> diff --git a/drivers/vfio/pci/cxl/vfio_cxl_emu.c b/drivers/vfio/pci/cxl/vfio_cxl_emu.c
> new file mode 100644
> index 000000000000..d5603c80fe51
> --- /dev/null
> +++ b/drivers/vfio/pci/cxl/vfio_cxl_emu.c
> @@ -0,0 +1,366 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved
> + */
> +
> +#include <linux/bitops.h>
> +#include <linux/vfio_pci_core.h>
> +
> +#include "../vfio_pci_priv.h"
> +#include "vfio_cxl_priv.h"
> +
> +/*
> + * comp_reg_virt[] layout:
> + * Index 0..N correspond to 32-bit registers at byte offset 0..hdm_reg_size-4
> + * within the HDM decoder capability block.
> + *
> + * Register layout within the HDM block (CXL spec 8.2.5.19):
> + * 0x00: HDM Decoder Capability
> + * 0x04: HDM Decoder Global Control
> + * 0x08: HDM Decoder Global Status
> + * 0x0c: (reserved)
> + * For each decoder N (N=0..hdm_count-1), at base 0x10 + N*0x20:
> + * +0x00: BASE_LO
> + * +0x04: BASE_HI
> + * +0x08: SIZE_LO
> + * +0x0c: SIZE_HI
> + * +0x10: CTRL
> + * +0x14: TARGET_LIST_LO
> + * +0x18: TARGET_LIST_HI
> + * +0x1c: (reserved)
> + */
> +
> +static inline __le32 *hdm_reg_ptr(struct vfio_pci_cxl_state *cxl, u32 off)
> +{
> + /*
> + * off is byte offset within the HDM block; comp_reg_virt is indexed
> + * as an array of __le32.
> + */
> + return &cxl->comp_reg_virt[off / sizeof(__le32)];
> +}
> +
> +static ssize_t virt_hdm_rev_reg_write(struct vfio_pci_core_device *vdev,
> + const __le32 *val32, u64 offset, u64 size)
> +{
> + /* Discard writes on reserved registers. */
> + return size;
> +}
> +
> +static ssize_t hdm_decoder_n_lo_write(struct vfio_pci_core_device *vdev,
> + const __le32 *val32, u64 offset, u64 size)
> +{> + u32 new_val = le32_to_cpu(*val32);
> +
> + if (WARN_ON_ONCE(size != CXL_REG_SIZE_DWORD))
> + return -EINVAL;
> +> + /* Bit [27:0] are reserved. */
> + new_val &= ~CXL_HDM_DECODER_BASE_LO_RESERVED_MASK;
> +
> + *hdm_reg_ptr(vdev->cxl, offset) = cpu_to_le32(new_val);
> +
> + return size;
> +}
> +
> +static ssize_t hdm_decoder_global_ctrl_write(struct vfio_pci_core_device *vdev,
> + const __le32 *val32, u64 offset, u64 size)
Why offset? If the dispatch function already checked and confirmed this is the offset for the global ctrl register then there's no need to pass in the offset.

> +{
> + u32 hdm_decoder_global_cap;
> + u32 new_val = le32_to_cpu(*val32);
> +
> + if (WARN_ON_ONCE(size != CXL_REG_SIZE_DWORD))
> + return -EINVAL;
> +> + /* Bit [31:2] are reserved. */
> + new_val &= ~CXL_HDM_DECODER_GLOBAL_CTRL_RESERVED_MASK;
> +
> + /* Poison On Decode Error Enable bit is 0 and RO if not support. */
> + hdm_decoder_global_cap = le32_to_cpu(*hdm_reg_ptr(vdev->cxl, 0));
> + if (!(hdm_decoder_global_cap & CXL_HDM_CAP_POISON_ON_DECODE_ERR_BIT))
> + new_val &= ~CXL_HDM_DECODER_GLOBAL_CTRL_POISON_EN_BIT;
> +
> + *hdm_reg_ptr(vdev->cxl, offset) = cpu_to_le32(new_val);
> +
> + return size;
> +}
> +
> +/*
> + * hdm_decoder_n_ctrl_write - Write handler for HDM decoder CTRL register.

If we are going to start with kdoc style comment, may as well finish the kdoc block and provide parameters and return values

> + *
> + * The COMMIT bit (bit 9) is the key: setting it requests the hardware to
> + * lock the decoder. The emulated COMMITTED bit (bit 10) mirrors COMMIT
> + * immediately to allow QEMU's notify_change to detect the transition and
> + * map/unmap the DPA MemoryRegion in the guest address space.
> + *
> + * Note: the actual hardware HDM decoder programming (writing the real
> + * BASE/SIZE with host physical addresses) happens in the QEMU notify_change
> + * callback BEFORE this write reaches the hardware. This ordering is
> + * correct because vfio_region_write() calls notify_change() first.
> + */
> +static ssize_t hdm_decoder_n_ctrl_write(struct vfio_pci_core_device *vdev,
> + const __le32 *val32, u64 offset, u64 size)
> +{
> + u32 hdm_decoder_global_cap;
> + u32 ro_mask = CXL_HDM_DECODER_CTRL_RO_BITS_MASK;
> + u32 rev_mask = CXL_HDM_DECODER_CTRL_RESERVED_MASK;
> + u32 new_val = le32_to_cpu(*val32);
> + u32 cur_val;
> +
> + if (WARN_ON_ONCE(size != CXL_REG_SIZE_DWORD))
> + return -EINVAL;
> +
> + cur_val = le32_to_cpu(*hdm_reg_ptr(vdev->cxl, offset));
> + if (cur_val & CXL_HDM_DECODER_CTRL_COMMIT_LOCK_BIT)
> + return size;
> +
> + hdm_decoder_global_cap = le32_to_cpu(*hdm_reg_ptr(vdev->cxl, 0));
> + ro_mask |= CXL_HDM_DECODER_CTRL_DEVICE_BITS_RO;
> + rev_mask |= CXL_HDM_DECODER_CTRL_DEVICE_RESERVED;
> + if (!(hdm_decoder_global_cap & CXL_HDM_CAP_UIO_SUPPORTED_BIT))
> + rev_mask |= CXL_HDM_DECODER_CTRL_UIO_RESERVED;
> +
> + new_val &= ~rev_mask;
> + cur_val &= ro_mask;
> + new_val = (new_val & ~ro_mask) | cur_val;
> +
> + /*
> + * Mirror COMMIT → COMMITTED immediately in the emulated state.
> + * QEMU's notify_change (called before this write reaches hardware)
> + * reads COMMITTED from the region fd to detect commit transitions.
> + */
> + if (new_val & CXL_HDM_DECODER_CTRL_COMMIT_BIT)
> + new_val |= CXL_HDM_DECODER_CTRL_COMMITTED_BIT;
> + else
> + new_val &= ~CXL_HDM_DECODER_CTRL_COMMITTED_BIT;
> +
> + *hdm_reg_ptr(vdev->cxl, offset) = cpu_to_le32(new_val);
> +
> + return size;
> +}
> +
> +/*
> + * Dispatch table for COMP_REGS region writes. Indexed by byte offset within
> + * the HDM decoder block. Returns the appropriate write handler.
> + *
> + * Layout:
> + * 0x00 HDM Decoder Capability (RO)
> + * 0x04 HDM Global Control (RW with reserved masking)
> + * 0x08 HDM Global Status (RO)
> + * 0x0c (reserved) (ignored)
> + * Per decoder N, base = 0x10 + N*0x20:
> + * base+0x00 BASE_LO (RW, [27:0] reserved)
> + * base+0x04 BASE_HI (RW)
> + * base+0x08 SIZE_LO (RW, [27:0] reserved)
> + * base+0x0c SIZE_HI (RW)
> + * base+0x10 CTRL (RW, complex rules)
> + * base+0x14 TARGET_LIST_LO (ignored for Type-2)
> + * base+0x18 TARGET_LIST_HI (ignored for Type-2)
> + * base+0x1c (reserved) (ignored)
> + */
> +static ssize_t comp_regs_dispatch_write(struct vfio_pci_core_device *vdev,
> + u32 off, const __le32 *val32, u32 size)
> +{
> + struct vfio_pci_cxl_state *cxl = vdev->cxl;
> + u32 dec_base, dec_off;
> +
> + /* HDM Decoder Capability (0x00): RO */
> + if (off == 0x00)

define magic number

> + return size;
> +
> + /* HDM Global Control (0x04) */
> + if (off == CXL_HDM_DECODER_GLOBAL_CTRL_OFFSET)
> + return hdm_decoder_global_ctrl_write(vdev, val32, off, size);
> +
> + /* HDM Global Status (0x08): RO */
> + if (off == 0x08)

define magic number

> + return size;
> +
> + /* Per-decoder registers start at 0x10, stride 0x20 */
> + if (off < CXL_HDM_DECODER_FIRST_BLOCK_OFFSET)
> + return size; /* reserved gap */
> +
> + dec_base = CXL_HDM_DECODER_FIRST_BLOCK_OFFSET;
> + dec_off = (off - dec_base) % CXL_HDM_DECODER_BLOCK_STRIDE;

Need a check here to make sure offset is within the number of supported decoders.

> +
> + switch (dec_off) {
> + case CXL_HDM_DECODER_N_BASE_LOW_OFFSET: /* BASE_LO */
> + case CXL_HDM_DECODER_N_SIZE_LOW_OFFSET: /* SIZE_LO */
> + return hdm_decoder_n_lo_write(vdev, val32, off, size);
> + case CXL_HDM_DECODER_N_BASE_HIGH_OFFSET: /* BASE_HI */
> + case CXL_HDM_DECODER_N_SIZE_HIGH_OFFSET: /* SIZE_HI */
> + /* Full 32-bit write, no reserved bits */
> + *hdm_reg_ptr(cxl, off) = *val32;
> + return size;
> + case CXL_HDM_DECODER_N_CTRL_OFFSET: /* CTRL */
> + return hdm_decoder_n_ctrl_write(vdev, val32, off, size);
> + case CXL_HDM_DECODER_N_TARGET_LIST_LOW_OFFSET:
> + case CXL_HDM_DECODER_N_TARGET_LIST_HIGH_OFFSET:
> + case CXL_HDM_DECODER_N_REV_OFFSET:
> + return virt_hdm_rev_reg_write(vdev, val32, off, size);
> + default:
> + return size;
> + }
> +}
> +
> +/*
> + * vfio_cxl_comp_regs_rw - regops rw handler for VFIO_REGION_SUBTYPE_CXL_COMP_REGS.
> + *
> + * Reads return the emulated HDM state (comp_reg_virt[]).
> + * Writes go through comp_regs_dispatch_write() for bit-field enforcement.
> + * Only 4-byte aligned 4-byte accesses are supported (hardware requirement).
> + */
> +static ssize_t vfio_cxl_comp_regs_rw(struct vfio_pci_core_device *vdev,
> + char __user *buf, size_t count,
> + loff_t *ppos, bool iswrite)
> +{
> + struct vfio_pci_cxl_state *cxl = vdev->cxl;
> + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
> + size_t done = 0;
> +
> + if (!count)
> + return 0;
> +
> + /* Clamp to region size */
> + if (pos >= cxl->hdm_reg_size)
> + return -EINVAL;
> + count = min(count, (size_t)(cxl->hdm_reg_size - pos));
> +
> + while (done < count) {
> + u32 sz = min_t(u32, CXL_REG_SIZE_DWORD, count - done);
> + u32 off = pos + done;
> + __le32 v;
> +
> + /* Enforce 4-byte alignment */
> + if (sz < CXL_REG_SIZE_DWORD || (off & 0x3))
> + return done ? (ssize_t)done : -EINVAL;
> +
> + if (iswrite) {
> + if (copy_from_user(&v, buf + done, sizeof(v)))
> + return done ? (ssize_t)done : -EFAULT;
> + comp_regs_dispatch_write(vdev, off, &v, sizeof(v));
> + } else {
> + v = *hdm_reg_ptr(cxl, off);
> + if (copy_to_user(buf + done, &v, sizeof(v)))
> + return done ? (ssize_t)done : -EFAULT;
> + }
> + done += sizeof(v);
> + }
> +
> + *ppos += done;
> + return done;
> +}
> +
> +static void vfio_cxl_comp_regs_release(struct vfio_pci_core_device *vdev,
> + struct vfio_pci_region *region)
> +{
> + /* comp_reg_virt is freed in vfio_cxl_clean_virt_regs(), not here. */
> +}
> +
> +static const struct vfio_pci_regops vfio_cxl_comp_regs_ops = {
> + .rw = vfio_cxl_comp_regs_rw,
> + .release = vfio_cxl_comp_regs_release,
> +};
> +
> +/*
> + * vfio_cxl_setup_virt_regs - Allocate emulated HDM register state.
> + *
> + * Allocates comp_reg_virt as a compact __le32 array covering only
> + * hdm_reg_size bytes of HDM decoder registers. The initial values
> + * are read from hardware via the BAR ioremap established by the caller.
> + *
> + * DVSEC state is accessed via vdev->vconfig (see the following patch).
> + */
> +int vfio_cxl_setup_virt_regs(struct vfio_pci_core_device *vdev)
> +{
> + struct vfio_pci_cxl_state *cxl = vdev->cxl;
> + size_t nregs;
> +
> + if (WARN_ON(!cxl->hdm_reg_size))
> + return -EINVAL;
> +
> + if (pci_resource_len(vdev->pdev, cxl->comp_reg_bar) <
> + cxl->comp_reg_offset + cxl->hdm_reg_offset + cxl->hdm_reg_size)
> + return -ENODEV;
> +
> + nregs = cxl->hdm_reg_size / sizeof(__le32);
> + cxl->comp_reg_virt = kcalloc(nregs, sizeof(__le32), GFP_KERNEL);
> + if (!cxl->comp_reg_virt)
> + return -ENOMEM;
> +
> + /* Establish persistent mapping; kept alive until vfio_cxl_clean_virt_regs(). */
> + cxl->hdm_iobase = ioremap(pci_resource_start(vdev->pdev, cxl->comp_reg_bar) +
> + cxl->comp_reg_offset + cxl->hdm_reg_offset,
> + cxl->hdm_reg_size);
> + if (!cxl->hdm_iobase) {
> + kfree(cxl->comp_reg_virt);
> + cxl->comp_reg_virt = NULL;
> + return -ENOMEM;
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * Called with memory_lock write side held (from vfio_cxl_reactivate_region).
> + * Uses the pre-established hdm_iobase, no ioremap() under the lock,
> + * which would deadlock on PREEMPT_RT where ioremap() can sleep.
> + */
> +void vfio_cxl_reinit_comp_regs(struct vfio_pci_core_device *vdev)
> +{
> + struct vfio_pci_cxl_state *cxl = vdev->cxl;
> + size_t i, nregs;
> +
> + if (!cxl || !cxl->comp_reg_virt || !cxl->hdm_iobase)
> + return;
> +
> + nregs = cxl->hdm_reg_size / sizeof(__le32);
> +
> + for (i = 0; i < nregs; i++)
> + cxl->comp_reg_virt[i] =
> + cpu_to_le32(readl(cxl->hdm_iobase + i * sizeof(__le32)));
> +}
> +
> +void vfio_cxl_clean_virt_regs(struct vfio_pci_core_device *vdev)
> +{
> + struct vfio_pci_cxl_state *cxl = vdev->cxl;
> +
> + if (cxl->hdm_iobase) {
> + iounmap(cxl->hdm_iobase);
> + cxl->hdm_iobase = NULL;
> + }
> + kfree(cxl->comp_reg_virt);
> + cxl->comp_reg_virt = NULL;
> +}
> +
> +/*
> + * vfio_cxl_register_comp_regs_region - Register the COMP_REGS device region.
> + *
> + * Exposes the emulated HDM decoder register state as a VFIO device region
> + * with type VFIO_REGION_SUBTYPE_CXL_COMP_REGS. QEMU attaches a
> + * notify_change callback to this region to intercept HDM COMMIT writes
> + * and map the DPA MemoryRegion at the appropriate GPA.
> + *
> + * The region is read+write only (no mmap) to ensure all accesses pass
> + * through comp_regs_dispatch_write() for proper bit-field enforcement.
> + */
> +int vfio_cxl_register_comp_regs_region(struct vfio_pci_core_device *vdev)
> +{
> + struct vfio_pci_cxl_state *cxl = vdev->cxl;
> + u32 flags = VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE;
> + int ret;
> +
> + if (!cxl || !cxl->comp_reg_virt)
> + return -ENODEV;
> +
> + ret = vfio_pci_core_register_dev_region(vdev,
> + PCI_VENDOR_ID_CXL |
> + VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
> + VFIO_REGION_SUBTYPE_CXL_COMP_REGS,
> + &vfio_cxl_comp_regs_ops,
> + cxl->hdm_reg_size, flags, cxl);
> + if (!ret)
> + cxl->comp_reg_region_idx = vdev->num_regions - 1;
> +
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(vfio_cxl_register_comp_regs_region);
> diff --git a/drivers/vfio/pci/cxl/vfio_cxl_priv.h b/drivers/vfio/pci/cxl/vfio_cxl_priv.h
> index b870926bfb19..4f2637874e9d 100644
> --- a/drivers/vfio/pci/cxl/vfio_cxl_priv.h
> +++ b/drivers/vfio/pci/cxl/vfio_cxl_priv.h
> @@ -25,14 +25,51 @@ struct vfio_pci_cxl_state {
> size_t hdm_reg_size;
> resource_size_t comp_reg_offset;
> size_t comp_reg_size;
> + __le32 *comp_reg_virt;
> + void __iomem *hdm_iobase;
> u32 hdm_count;
> int dpa_region_idx;
> + int comp_reg_region_idx;
> u16 dvsec;
> u8 comp_reg_bar;
> bool precommitted;
> bool region_active;
> };
>
> +/* Register access sizes */
> +#define CXL_REG_SIZE_WORD 2
> +#define CXL_REG_SIZE_DWORD 4
> +
> +/* HDM Decoder - register offsets (CXL 2.0 8.2.5.19) */
> +#define CXL_HDM_DECODER_GLOBAL_CTRL_OFFSET 0x4
> +#define CXL_HDM_DECODER_FIRST_BLOCK_OFFSET 0x10
> +#define CXL_HDM_DECODER_BLOCK_STRIDE 0x20
> +#define CXL_HDM_DECODER_N_BASE_LOW_OFFSET 0x0
> +#define CXL_HDM_DECODER_N_BASE_HIGH_OFFSET 0x4
> +#define CXL_HDM_DECODER_N_SIZE_LOW_OFFSET 0x8
> +#define CXL_HDM_DECODER_N_SIZE_HIGH_OFFSET 0xc
> +#define CXL_HDM_DECODER_N_CTRL_OFFSET 0x10
> +#define CXL_HDM_DECODER_N_TARGET_LIST_LOW_OFFSET 0x14
> +#define CXL_HDM_DECODER_N_TARGET_LIST_HIGH_OFFSET 0x18
> +#define CXL_HDM_DECODER_N_REV_OFFSET 0x1c
> +
> +/* HDM Decoder Global Capability / Control - bit definitions */
> +#define CXL_HDM_CAP_POISON_ON_DECODE_ERR_BIT BIT(10)
> +#define CXL_HDM_CAP_UIO_SUPPORTED_BIT BIT(13)
> +
> +/* HDM Decoder N Control */
> +#define CXL_HDM_DECODER_CTRL_COMMIT_LOCK_BIT BIT(8)
> +#define CXL_HDM_DECODER_CTRL_COMMIT_BIT BIT(9)
> +#define CXL_HDM_DECODER_CTRL_COMMITTED_BIT BIT(10)
> +#define CXL_HDM_DECODER_CTRL_RO_BITS_MASK (BIT(10) | BIT(11))
> +#define CXL_HDM_DECODER_CTRL_RESERVED_MASK (BIT(15) | GENMASK(31, 28))
> +#define CXL_HDM_DECODER_CTRL_DEVICE_BITS_RO BIT(12)
> +#define CXL_HDM_DECODER_CTRL_DEVICE_RESERVED (GENMASK(19, 16) | GENMASK(23, 20))
> +#define CXL_HDM_DECODER_CTRL_UIO_RESERVED (BIT(14) | GENMASK(27, 24))
> +#define CXL_HDM_DECODER_BASE_LO_RESERVED_MASK GENMASK(27, 0)
> +#define CXL_HDM_DECODER_GLOBAL_CTRL_RESERVED_MASK GENMASK(31, 2)
> +#define CXL_HDM_DECODER_GLOBAL_CTRL_POISON_EN_BIT BIT(0)

Maybe the reg defines should go in include/cxl/regs.h? Or move shared definitions out of drivers/cxl/.

DJ

> +
> /*
> * CXL DVSEC for CXL Devices - register offsets within the DVSEC
> * (CXL 2.0+ 8.1.3).
> @@ -41,4 +78,8 @@ struct vfio_pci_cxl_state {
> #define CXL_DVSEC_CAPABILITY_OFFSET 0xa
> #define CXL_DVSEC_MEM_CAPABLE BIT(2)
>
> +int vfio_cxl_setup_virt_regs(struct vfio_pci_core_device *vdev);
> +void vfio_cxl_clean_virt_regs(struct vfio_pci_core_device *vdev);
> +void vfio_cxl_reinit_comp_regs(struct vfio_pci_core_device *vdev);
> +
> #endif /* __LINUX_VFIO_CXL_PRIV_H */
> diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
> index 8f440f9eaa0c..f8db9a05c033 100644
> --- a/drivers/vfio/pci/vfio_pci_priv.h
> +++ b/drivers/vfio/pci/vfio_pci_priv.h
> @@ -152,6 +152,8 @@ int vfio_cxl_register_cxl_region(struct vfio_pci_core_device *vdev);
> void vfio_cxl_unregister_cxl_region(struct vfio_pci_core_device *vdev);
> void vfio_cxl_zap_region_locked(struct vfio_pci_core_device *vdev);
> void vfio_cxl_reactivate_region(struct vfio_pci_core_device *vdev);
> +int vfio_cxl_register_comp_regs_region(struct vfio_pci_core_device *vdev);
> +void vfio_cxl_reinit_comp_regs(struct vfio_pci_core_device *vdev);
>
> #else
>
> @@ -173,6 +175,11 @@ static inline void
> vfio_cxl_zap_region_locked(struct vfio_pci_core_device *vdev) { }
> static inline void
> vfio_cxl_reactivate_region(struct vfio_pci_core_device *vdev) { }
> +static inline int
> +vfio_cxl_register_comp_regs_region(struct vfio_pci_core_device *vdev)
> +{ return 0; }
> +static inline void
> +vfio_cxl_reinit_comp_regs(struct vfio_pci_core_device *vdev) { }
>
> #endif /* CONFIG_VFIO_CXL_CORE */
>