[PATCH 02/14] KVM: x86: Open code handling of completed MMIO reads in emulator_read_write()

From: Sean Christopherson

Date: Tue Feb 24 2026 - 20:22:17 EST


Open code the handling of completed MMIO reads instead of using an ops
hook, as burying the logic behind a (likely RETPOLINE'd) indirect call,
and with an unintuitive name, makes relatively straightforward code hard
to comprehend.

Opportunistically add comments to explain the dependencies between the
emulator's mem_read cache and the MMIO read completion logic, as it's very
easy to overlook the cache's role in getting the read data into the
correct destination.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/emulate.c | 13 +++++++++++++
arch/x86/kvm/x86.c | 33 ++++++++++++++++-----------------
2 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c8e292e9a24d..70850e591350 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1297,12 +1297,25 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
int rc;
struct read_cache *mc = &ctxt->mem_read;

+ /*
+ * If the read gets a cache hit, simply copy the value from the cache.
+ * A "hit" here means that there is unused data in the cache, i.e. when
+ * re-emulating an instruction to complete a userspace exit, KVM relies
+ * on "no decode" to ensure the instruction is re-emulated in the same
+ * sequence, so that multiple reads are fulfilled in the correct order.
+ */
if (mc->pos < mc->end)
goto read_cached;

if (KVM_EMULATOR_BUG_ON((mc->end + size) >= sizeof(mc->data), ctxt))
return X86EMUL_UNHANDLEABLE;

+ /*
+ * Route all reads to the cache. This allows @dest to be an on-stack
+ * variable without triggering use-after-free if KVM needs to exit to
+ * userspace to handle an MMIO read (the MMIO fragment will point at
+ * the current location in the cache).
+ */
rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
&ctxt->exception);
if (rc != X86EMUL_CONTINUE)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ff3a6f86973f..8b1f02cc8196 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8109,8 +8109,6 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
}

struct read_write_emulator_ops {
- int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
- int bytes);
int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes);
int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -8120,18 +8118,6 @@ struct read_write_emulator_ops {
bool write;
};

-static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
-{
- if (vcpu->mmio_read_completed) {
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
- vcpu->mmio_fragments[0].gpa, val);
- vcpu->mmio_read_completed = 0;
- return 1;
- }
-
- return 0;
-}
-
static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
@@ -8167,7 +8153,6 @@ static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
}

static const struct read_write_emulator_ops read_emultor = {
- .read_write_prepare = read_prepare,
.read_write_emulate = read_emulate,
.read_write_mmio = vcpu_mmio_read,
.read_write_exit_mmio = read_exit_mmio,
@@ -8250,9 +8235,23 @@ static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
if (WARN_ON_ONCE((bytes > 8u || !ops->write) && object_is_on_stack(val)))
return X86EMUL_UNHANDLEABLE;

- if (ops->read_write_prepare &&
- ops->read_write_prepare(vcpu, val, bytes))
+ /*
+ * If the read was already completed via a userspace MMIO exit, there's
+ * nothing left to do except trace the MMIO read. When completing MMIO
+ * reads, KVM re-emulates the instruction to propagate the value into
+ * the correct destination, e.g. into the correct register, but the
+ * value itself has already been copied to the read cache.
+ *
+ * Note! This is *tightly* coupled to read_emulated() satisfying reads
+ * from the emulator's mem_read cache, so that the MMIO fragment data
+ * is copied to the correct chunk of the correct operand.
+ */
+ if (!ops->write && vcpu->mmio_read_completed) {
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
+ vcpu->mmio_fragments[0].gpa, val);
+ vcpu->mmio_read_completed = 0;
return X86EMUL_CONTINUE;
+ }

vcpu->mmio_nr_fragments = 0;

--
2.53.0.414.gf7e9f6c205-goog