Re: [PATCH RFC v1 16/20] KVM: x86: Decode REX2 prefix in the emulator
From: Chang S. Bae
Date: Mon Nov 17 2025 - 15:02:32 EST
On 11/13/2025 3:30 PM, Chang S. Bae wrote:
On 11/11/2025 9:55 AM, Paolo Bonzini wrote:
On 11/10/25 19:01, Chang S. Bae wrote:
case 0x40 ... 0x4f: /* REX */
if (mode != X86EMUL_MODE_PROT64)
goto done_prefixes;
+ if (ctxt->rex_prefix == REX2_PREFIX)
+ break;
ctxt->rex_prefix = REX_PREFIX;
ctxt->rex.raw = 0x0f & ctxt->b;
continue;
+ case 0xd5: /* REX2 */
+ if (mode != X86EMUL_MODE_PROT64)
+ goto done_prefixes;
[...]
+ if (ctxt->rex_prefix == REX2_PREFIX &&
+ ctxt->rex.bits.m0 == 0)
+ break;
+ ctxt->rex_prefix = REX2_PREFIX;
+ ctxt->rex.raw = insn_fetch(u8, ctxt);
+ continue;
After REX2 always comes the main opcode byte, so you can "goto
done_prefixes" here. Or even jump here already; in pseudocode:
ctxt->b = insn_fetch(u8, ctxt);
if (rex2 & REX_M)
goto decode_twobyte;
else
goto decode_onebyte;
Yes, agreed. I think this makes the control flow more explicit.
While rebasing onto your VEX series, I noticed a couple of missings:
(1) Jumping directly to the decode path skips the ctxt->op_bytes
setup.
(2) It also removes the logic that detects the invalid sequence:
REX2->REX (unless intentional).
Perhaps it makes sense to simply continue prefix parsing. Then, at
'done_prefixes', we can check the M bit next to the W-bit check and jump
to the two-byte decode path.
I’ve attached a revised diff on top of the VEX series.diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1b3da3ba26b8..3a66741b6c8c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -245,6 +245,7 @@ enum {
REX_X = 2,
REX_R = 4,
REX_W = 8,
+ REX_M = 0x80,
};
static void writeback_registers(struct x86_emulate_ctxt *ctxt)
@@ -4849,6 +4850,18 @@ static int x86_decode_avx(struct x86_emulate_ctxt *ctxt,
return rc;
}
+static inline bool rex2_invalid(struct x86_emulate_ctxt *ctxt)
+{
+ const struct x86_emulate_ops *ops = ctxt->ops;
+ u64 xcr = 0;
+
+ return ctxt->rex_prefix == REX_PREFIX ||
+ (ctxt->rex_prefix == REX2_PREFIX && !(ctxt->rex_bits & REX_M)) ||
+ !(ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE) ||
+ ops->get_xcr(ctxt, 0, &xcr) ||
+ !(xcr & XFEATURE_MASK_APX);
+}
+
int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
{
int rc = X86EMUL_CONTINUE;
@@ -4902,7 +4915,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
ctxt->op_bytes = def_op_bytes;
ctxt->ad_bytes = def_ad_bytes;
- /* Legacy prefixes. */
+ /* Legacy and REX/REX2 prefixes. */
for (;;) {
switch (ctxt->b = insn_fetch(u8, ctxt)) {
case 0x66: /* operand-size override */
@@ -4945,9 +4958,23 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
case 0x40 ... 0x4f: /* REX */
if (mode != X86EMUL_MODE_PROT64)
goto done_prefixes;
+ if (ctxt->rex_prefix == REX2_PREFIX) {
+ opcode = ud;
+ goto done_modrm;
+ }
ctxt->rex_prefix = REX_PREFIX;
ctxt->rex_bits = ctxt->b & 0xf;
continue;
+ case 0xd5: /* REX2 */
+ if (mode != X86EMUL_MODE_PROT64)
+ goto done_prefixes;
+ if (rex2_invalid(ctxt)) {
+ opcode = ud;
+ goto done_modrm;
+ }
+ ctxt->rex_prefix = REX2_PREFIX;
+ ctxt->rex_bits = insn_fetch(u8, ctxt);
+ continue;
case 0xf0: /* LOCK */
ctxt->lock_prefix = 1;
break;
@@ -4970,6 +4997,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
if (ctxt->rex_bits & REX_W)
ctxt->op_bytes = 8;
+ if (ctxt->rex_bits & REX_M)
+ goto decode_twobytes;
+
/* Opcode byte(s). */
if (ctxt->b == 0xc4 || ctxt->b == 0xc5) {
/* VEX or LDS/LES */
@@ -4987,8 +5017,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
goto done;
} else if (ctxt->b == 0x0f) {
/* Two- or three-byte opcode */
- ctxt->opcode_len = 2;
ctxt->b = insn_fetch(u8, ctxt);
+decode_twobytes:
+ ctxt->opcode_len = 2;
opcode = twobyte_table[ctxt->b];
/* 0F_38 opcode map */